<a href="https://colab.research.google.com/github/olivercase/EC3365/blob/main/Homework1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

In [None]:
path = '/content/Credit.csv'
df = pd.read_csv(path)
df

In [None]:
print(df.head())
print(df.info())
df.rename(columns={'Rating': 'Credit Score'}, inplace=True)

In [None]:
numeric_cols = df.select_dtypes(include=['number'])
summary_stats_numeric = numeric_cols.describe().transpose()

summary_stats_numeric['range'] = numeric_cols.max() - numeric_cols.min()
summary_stats_numeric['IQR'] = numeric_cols.quantile(0.75) - numeric_cols.quantile(0.25)

categorical_cols = df.select_dtypes(include=['object', 'category'])
summary_stats_categorical = categorical_cols.apply(pd.Series.value_counts)

print("Numeric Columns Summary:\n", summary_stats_numeric)
print("\nCategorical Columns Summary:\n", summary_stats_categorical)

In [None]:
fig, ax = plt.subplots(figsize=(8, 4))
sns.kdeplot(df['Income'], shade=True, color='#B4BA39')
ax.grid(which="major", axis='y', color='#758D99', alpha=0.6, zorder=1)
ax.spines[['top', 'right', 'bottom']].set_visible(False)
ax.spines['left'].set_linewidth(1.1)
ax.set_xticklabels(ax.get_xticks())
ax.set_yticklabels(ax.get_yticks())
ax.set_title("Density of Income", loc='left', fontsize=13, fontweight='bold')

In [None]:
corr_matrix = df.corr()

cmap = sns.diverging_palette(230, 20, as_cmap=True)
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap=cmap, fmt=".2f", mask=mask)
sns.heatmap(corr_matrix.loc[['Credit Score'],:], annot=True, cmap=['#E3120B'], fmt=".2f", cbar=False, mask=mask)
sns.heatmap(corr_matrix.loc[:,['Credit Score']], annot=True, cmap=['#E3120B'], fmt=".2f", cbar=False, mask=mask)

plt.title('Correlation Heatmap of Credit', loc='left', fontsize=13, weight='bold')
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.yticks(rotation=0, fontsize=10)
plt.gca().spines[['top', 'right', 'bottom', 'left']].set_visible(False)
plt.tight_layout()
plt.show()

In [None]:
economist_palette = {'#E3120B', '#006BA2',  '#DB444B', }

fig, ax = plt.subplots(figsize=(10, 6))
sns.scatterplot(data=df, x='Income', y='Credit Score', alpha=0.5, hue='Region', palette=economist_palette)

# Styling
ax.grid(which="major", axis='y', color='#758D99', alpha=0.6, zorder=1)
ax.spines[['top', 'right', 'bottom']].set_visible(False)
ax.spines['left'].set_linewidth(1.1)
ax.set_xlabel("Income", fontsize=11, alpha=0.8)
ax.set_ylabel("Credit Score", fontsize=11, alpha=0.8)
ax.xaxis.set_tick_params(labelsize=10)
ax.yaxis.set_tick_params(labelsize=10)
ax.set_xticklabels([f"{x:.0f}" for x in ax.get_xticks()])
ax.set_yticklabels([f"{y:.0f}" for y in ax.get_yticks()])
plt.title('Income vs. Credit Score by Region', loc='left', fontsize=13, fontweight='bold')
plt.legend(title='Region', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
my_palette = {'#E3120B', '#006BA2',  '#DB444B', }

fig, ax = plt.subplots(figsize=(10, 6))
sns.scatterplot(data=df, x='Limit', y='Credit Score', alpha=0.5, hue='Region', palette=my_palette)

# Styling
ax.grid(which="major", axis='y', color='#758D99', alpha=0.6, zorder=1)
ax.spines[['top', 'right', 'bottom']].set_visible(False)
ax.spines['left'].set_linewidth(1.1)
ax.set_xlabel("Limit", fontsize=11, alpha=0.8)
ax.set_ylabel("Credit Score", fontsize=11, alpha=0.8)
ax.xaxis.set_tick_params(labelsize=10)
ax.yaxis.set_tick_params(labelsize=10)
ax.set_xticklabels([f"{x:.0f}" for x in ax.get_xticks()])
ax.set_yticklabels([f"{y:.0f}" for y in ax.get_yticks()])
plt.title('Limit vs. Credit Score by Region', loc='left', fontsize=13, fontweight='bold')
plt.legend(title='Region', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:

fig, ax = plt.subplots(figsize=(10, 6))
sns.scatterplot(data=df, x='Age', y='Credit Score', alpha=0.5, hue='Region', palette=my_palette)

# Styling
ax.grid(which="major", axis='y', color='#758D99', alpha=0.6, zorder=1)
ax.spines[['top', 'right', 'bottom']].set_visible(False)
ax.spines['left'].set_linewidth(1.1)
ax.set_xlabel("Age", fontsize=11, alpha=0.8)
ax.set_ylabel("Credit Score", fontsize=11, alpha=0.8)
ax.xaxis.set_tick_params(labelsize=10)
ax.yaxis.set_tick_params(labelsize=10)
ax.set_xticklabels([f"{x:.0f}" for x in ax.get_xticks()])
ax.set_yticklabels([f"{y:.0f}" for y in ax.get_yticks()])
plt.title('Age vs. Credit Score by Region', loc='left', fontsize=13, fontweight='bold')
plt.legend(title='Region', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:

fig, ax = plt.subplots(figsize=(10, 6))
sns.histplot(df['Age'], kde=False, color='#006BA2')

# Styling
ax.grid(which="major", axis='y', color='#758D99', alpha=0.6, zorder=1)
ax.spines[['top', 'right', 'bottom']].set_visible(False)
ax.spines['left'].set_linewidth(1.1)
ax.set_xlabel("Age", fontsize=11, alpha=0.8)
ax.set_ylabel("Frequency", fontsize=11, alpha=0.8)
ax.xaxis.set_tick_params(labelsize=10)
ax.yaxis.set_tick_params(labelsize=10)
plt.title('Age Distribution', loc='left', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
credit_score_palette = sns.diverging_palette(230, 20, as_cmap=True)


fig, ax = plt.subplots(figsize=(10, 6))
sns.scatterplot(data=df, x='Income', y='Limit', hue='Credit Score', palette=credit_score_palette)

# Styling
ax.grid(which="major", axis='y', color='#758D99', alpha=0.6, zorder=1)
ax.spines[['top', 'right']].set_visible(False)
ax.spines['bottom'].set_linewidth(1.1)
ax.set_xlabel("Income", fontsize=11, alpha=0.8)
ax.set_ylabel("Limit", fontsize=11, alpha=0.8)
ax.xaxis.set_tick_params(labelsize=10)
ax.yaxis.set_tick_params(labelsize=10)
plt.title('Income vs. Limit by Credit Score', loc='left', fontsize=13, fontweight='bold')
plt.legend(title='Credit Score', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:

fig, ax = plt.subplots(figsize=(10, 6))
sns.histplot(df['Income'], kde=True, color='#006BA2')

# Styling
ax.grid(which="major", axis='y', color='#758D99', alpha=0.6, zorder=1)
ax.spines[['top', 'right', 'bottom']].set_visible(False)
ax.spines['left'].set_linewidth(1.1)
ax.set_xlabel("Income", fontsize=11, alpha=0.8)
ax.set_ylabel("Frequency", fontsize=11, alpha=0.8)
ax.xaxis.set_tick_params(labelsize=10)
ax.yaxis.set_tick_params(labelsize=10)
plt.title('Income Distribution', loc='left', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
X_income = sm.add_constant(df['Income'])
model_income = sm.OLS(df['Credit Score'], X_income).fit()

print("Regression: Income vs Credit Score")
print(model_income.summary())

In [None]:
X_limit = sm.add_constant(df['Limit'])
model_limit = sm.OLS(df['Credit Score'], X_limit).fit()

print("\nRegression: Limit vs Credit Score")
print(model_limit.summary())

In [None]:
def plot_regression(df, x_var, y_var, model, ax, title):
    sns.scatterplot(data=df, x=x_var, y=y_var, color='#006BA2', ax=ax)
    x_values = np.linspace(df[x_var].min(), df[x_var].max(), 100)
    y_pred = model.params[0] + model.params[1] * x_values
    sns.lineplot(x=x_values, y=y_pred, color='#E3120B', ax=ax)
    ax.set_title(title, fontsize=13, fontweight='bold', loc='left')
    ax.grid(which="major", axis='y', color='#758D99', alpha=0.6)
    ax.spines[['top', 'right', 'bottom']].set_visible(False)
    ax.spines['left'].set_linewidth(1.1)

fig, axes = plt.subplots(1, 2, figsize=(15, 6))
plot_regression(df, 'Income', 'Credit Score', model_income, axes[0], 'Income vs Credit Score')
plot_regression(df, 'Limit', 'Credit Score', model_limit, axes[1], 'Limit vs Credit Score')
plt.tight_layout()
plt.show()


In [None]:
X_combine = df[['Income', 'Limit']]
X_combine = sm.add_constant(X_combine)
Y_combine = df['Credit Score']

model_combine = sm.OLS(Y_combine, X_combine).fit()

print("\nRegression: Limit and Income vs Credit Score")
print(model_combine.summary())

In [None]:
vif_data = pd.df()
vif_data['Feature'] = X_combine.columns
vif_data['VIF'] = [variance_inflation_factor(X_combine.values, i) for i in range(X_combine.shape[1])]