In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Load and examine the dataset
file = 'E:/Sri Lakshmi/data.csv'
df = pd.read_csv(file)

# Display the first few rows of the dataset
print(df.head())

# Step 2: Plot histograms and distributions to visualize gaming revenues
plt.figure(figsize=(10, 6))
sns.histplot(df['Total Gross Gaming Revenue'], kde=True)
plt.title('Distribution of Total Gross Gaming Revenue')
plt.xlabel('Total Gross Gaming Revenue')
plt.ylabel('Frequency')
plt.show()

# Step 3: Calculate summary statistics
summary_stats = df.describe()
print(summary_stats)

# Step 4: Explore correlations between variables
# Select only numeric columns for correlation matrix
numeric_df = df.select_dtypes(include=['float64', 'int64'])
correlation_matrix = numeric_df.corr()

plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Bar chart of total gross gaming revenue by licensee
plt.figure(figsize=(12, 6))
sns.barplot(data=df, x='Licensee', y='Total Gross Gaming Revenue', errorbar=None) 
plt.title('Total Gross Gaming Revenue by Licensee')
plt.xlabel('Licensee')
plt.ylabel('Total Gross Gaming Revenue')
plt.xticks(rotation=90)
plt.show()

# Scatter plot of wagers vs. patron winnings
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='Wagers', y='Patron Winnings')
plt.title('Wagers vs. Patron Winnings')
plt.xlabel('Wagers')
plt.ylabel('Patron Winnings')
plt.show()

# Step 5: Segment the data by regions and compare performance (assuming 'Region' column exists)
if 'Region' in df.columns:
    region_summary = df.groupby('Region')['Total Gross Gaming Revenue'].sum().reset_index()
    plt.figure(figsize=(12, 6))
    sns.barplot(data=region_summary, x='Region', y='Total Gross Gaming Revenue', errorbar=None)
    plt.title('Total Gross Gaming Revenue by Region')
    plt.xlabel('Region')
    plt.ylabel('Total Gross Gaming Revenue')
    plt.xticks(rotation=45)
    plt.show()

# Step 6: Handle missing data and outliers
# Select only numeric columns for filling NaN and handling outliers
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())

# Handle outliers (e.g., cap at 99th percentile)
for col in numeric_cols:
    percentile_99 = df[col].quantile(0.99)
    df[col] = df[col].apply(lambda x: min(x, percentile_99))

# Step 7: Summarize findings in a report
report = {
    'Summary Statistics': summary_stats,
    'Correlation Matrix': correlation_matrix,
    'Insights': [
        'Certain licensees dominate in total gross gaming revenue.',
        'There is a strong positive correlation between wagers and patron winnings.',
        'Regions with higher total gross gaming revenue might indicate more successful platforms.'
    ],
    'Recommendations': [
        'Focus on licensees with higher revenues for marketing campaigns.',
        'Explore further the strong correlation between wagers and patron winnings for potential insights.',
        'Develop targeted strategies for regions with higher revenues.'
    ]
}

# Print the report
for section, content in report.items():
    print(f"\n{section}:")
    print(content)

# Save the cleaned data and report
df.to_csv('cleaned_casino_gaming_dataset.csv', index=False)
with open('report.txt', 'w') as file:
    for section, content in report.items():
        file.write(f"\n{section}:\n")
        file.write(str(content) + '\n')

ImportError: dlopen(/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/PIL/_imaging.cpython-311-darwin.so, 0x0002): tried: '/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/PIL/_imaging.cpython-311-darwin.so' (mach-o file, but is an incompatible architecture (have 'x86_64', need 'arm64')), '/System/Volumes/Preboot/Cryptexes/OS/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/PIL/_imaging.cpython-311-darwin.so' (no such file), '/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/PIL/_imaging.cpython-311-darwin.so' (mach-o file, but is an incompatible architecture (have 'x86_64', need 'arm64'))