In [None]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime

# Key Question : How does the demographic and the income level of the people affect the preference and acceptance of self-driving cars? 

### Preferences for self-driving cars by age, gender, education, and income levels
### Correlations between these variables

In [None]:
#Read raw file
df=pd.read_csv('https://raw.githubusercontent.com/EvaW01/schulich_ai_fundamentals/main/MMAI_Fundamental_Assignment.csv')

df.head()


In [None]:
unique_ages = df['Age'].unique()
unique_ages

In [None]:
from scipy.stats import chi2_contingency

In [None]:
# Function to perform chi-square test and print the result
def chi_square_test(col1, col2):
    contingency_table = pd.crosstab(df[col1], df[col2])
    chi2, p, dof, expected = chi2_contingency(contingency_table)
    print(f"Chi-square test between {col1} and {col2}:")
    print(f"Chi2: {chi2}, p-value: {p}, Degrees of freedom: {dof}")
    print("Expected frequencies:")
    print(expected)
    print("\n")

# Perform chi-square tests for relevant pairs
chi_square_test('Gender', 'Read_about_SelfDrive_Car')
chi_square_test('Gender', 'Widespread_use_SelfDrive_Car')

In [None]:
chi_square_test('Education', 'Read_about_SelfDrive_Car')
chi_square_test('Education', 'Widespread_use_SelfDrive_Car')

In [None]:
chi_square_test('Income_Level', 'Read_about_SelfDrive_Car')
chi_square_test('Income_Level', 'Widespread_use_SelfDrive_Car')

In [None]:
chi_square_test('Age', 'Read_about_SelfDrive_Car')
chi_square_test('Age', 'Widespread_use_SelfDrive_Car')

In [None]:
# Create count plots for preferences

plt.figure(figsize=(12, 6))
sns.countplot(x='Age', hue='Read_about_SelfDrive_Car', data=df)
plt.title('Preferences for Self-Driving Cars by Age')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(12, 6))
sns.countplot(x='Gender', hue='Read_about_SelfDrive_Car', data=df)
plt.title('Preferences for Self-Driving Cars by Gender')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(12, 6))
sns.countplot(x='Education', hue='Read_about_SelfDrive_Car', data=df)
plt.title('Preferences for Self-Driving Cars by Education')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(12, 6))
sns.countplot(x='Income_Level', hue='Read_about_SelfDrive_Car', data=df)
plt.title('Preferences for Self-Driving Cars by Income Level')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Create count plots for preferences

plt.figure(figsize=(12, 6))
sns.countplot(x='Age', hue='Widespread_use_SelfDrive_Car', data=df)
plt.title('Preferences for Self-Driving Cars by Age')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(12, 6))
sns.countplot(x='Gender', hue='Widespread_use_SelfDrive_Car', data=df)
plt.title('Preferences for Self-Driving Cars by Gender')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(12, 6))
sns.countplot(x='Education', hue='Widespread_use_SelfDrive_Car', data=df)
plt.title('Preferences for Self-Driving Cars by Education')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(12, 6))
sns.countplot(x='Income_Level', hue='Widespread_use_SelfDrive_Car', data=df)
plt.title('Preferences for Self-Driving Cars by Income Level')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()
