In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Loading the data files and processing the file in chunks
loan_data = pd.read_csv('loan.csv', low_memory=False)


# Displaying the first few rows of each file to understand the structure
loan_data_head = loan_data.head()


loan_data_head


In [None]:
# Univariate Analysis: Distribution of loan amount

# Calculating metrics for loan amount distribution
loan_amount_metrics = loan_data['loan_amnt'].describe()
print("Loan Amount Metrics:\n", loan_amount_metrics)

plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
sns.histplot(loan_data['loan_amnt'], bins=30, kde=True, color='blue')
plt.title('Distribution of Loan Amount')
plt.xlabel('Loan Amount')
plt.ylabel('Frequency')



plt.tight_layout()
plt.show()


In [None]:
# Univariate Analysis: Distribution of interest rate

# Converting 'int_rate' to numeric for analysis
loan_data['int_rate'] = loan_data['int_rate'].str.rstrip('%').astype('float')

# Calculating metrics for interest rate distribution
interest_rate_metrics = loan_data['int_rate'].describe()

print("\nInterest Rate Metrics:\n", interest_rate_metrics)


plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 2)
sns.histplot(loan_data['int_rate'], bins=30, kde=True, color='green')
plt.title('Distribution of Interest Rate')
plt.xlabel('Interest Rate (%)')
plt.ylabel('Frequency')


plt.tight_layout()
plt.show()

In [None]:
# Univariate Analysis: Distribution of loan status

# The distribution of loan statuses
loan_status_counts = loan_data['loan_status'].value_counts()
print(loan_status_counts)


plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 3)
loan_status_counts = loan_data['loan_status'].value_counts()
sns.barplot(x=loan_status_counts.index, y=loan_status_counts.values, hue=loan_status_counts.index, palette='viridis', legend=False)
plt.title('Loan Status Distribution')
plt.xlabel('Loan Status')
plt.ylabel('Count')
plt.xticks(rotation=90)


plt.tight_layout()
plt.show()

### Key Findings
1. **Loan Amount Distribution**: Most loans are smaller, with a higher frequency of loans under $20,000.

Most borrowers opt for loans between \$5,500 and \$15,000, with a skew towards smaller loan amounts. This could indicate that the company primarily serves middle-market or risk-averse customers who prefer lower debt obligations.

2. **Interest Rate Distribution**: Interest rates are between 10% and 15%.

A broad pricing strategy is implemented, where most customers are charged between 9.25% and 14.59%, with some exceptions. These rates are likely determined by a risk-based pricing model, associating higher rates with higher-risk borrowers.

3. **Loan Status Distribution**: The majority of loans are either fully paid or charged off (defaulted) with a smaller proportion labeled as current.

The majority of loans are successfully repaid, indicating that the company's lending criteria and risk management are effective in most cases. However, the 14% charge-off rate suggests room for improvement in credit assessment and risk management to reduce the default rate.