<a href="https://colab.research.google.com/github/poronita/Credit-Card-Spending-Habits-in-India/blob/main/Credit_Card_Spending_Habits_in_India.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [4]:
# Load the data
file_path = "/content/Credit card transactions - India - Simple.csv"
data = pd.read_csv(file_path)

In [32]:
# Convert date to datetime and fill missing values or drop duplicates if necessary
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
data.dropna(inplace=True)

In [33]:
# Removing outliers (simplified method)
Q1 = data['Amount'].quantile(0.25)
Q3 = data['Amount'].quantile(0.75)
IQR = Q3 - Q1
data = data[(data['Amount'] >= Q1 - 1.5 * IQR) & (data['Amount'] <= Q3 + 1.5 * IQR)]

In [38]:
# Assuming 'Amount' is the target and others are features
X = data.drop(['Amount', 'Date'], axis=1)  # Drop Date for simplicity, or encode it
y = data['Amount']

# Convert categorical variables to dummy variables
X = pd.get_dummies(X)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [39]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')


Mean Squared Error: 7479803580.305944


In [40]:
# EDA and Visualizations for Dashboard
# Monthly spending trend
monthly_spending = data.groupby(data['Date'].dt.to_period('M'))['Amount'].sum().reset_index()
monthly_spending['Date'] = monthly_spending['Date'].astype(str)  # Convert to string for plotting
fig_monthly_trend = px.line(monthly_spending, x='Date', y='Amount', title='Monthly Spending Trend')

In [41]:
# Top 10 Cities by Spending
top_cities = data.groupby('City')['Amount'].sum().nlargest(10).reset_index()
fig_top_cities = px.bar(top_cities, x='City', y='Amount', title='Top 10 Cities by Spending')



In [42]:
# Dash App for Dashboard
app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1('Credit Card Spending Analysis Dashboard'),
    dcc.Graph(figure=fig_monthly_trend),
    dcc.Graph(figure=fig_top_cities),
    # Add more dcc.Graph or html.Components for other analyses
])

if __name__ == '__main__':
    app.run_server(debug=True)

<IPython.core.display.Javascript object>