In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
japanese_data = pd.read_excel("JPN Data.xlsx")
indian_data = pd.read_excel("IN_Data.xlsx")

In [3]:
le = LabelEncoder()
japanese_data['GENDER_ENC'] = le.fit_transform(japanese_data['GENDER'])

In [4]:
features = ['CURR_AGE', 'GENDER_ENC', 'ANN_INCOME', 'AGE_CAR']
target = 'PURCHASE'

In [5]:
X = japanese_data[features]
y = japanese_data[target]

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [8]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [9]:
y_pred = model.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Confusion Matrix:
 [[1833 1516]
 [1013 3638]]

Classification Report:
               precision    recall  f1-score   support

           0       0.64      0.55      0.59      3349
           1       0.71      0.78      0.74      4651

    accuracy                           0.68      8000
   macro avg       0.67      0.66      0.67      8000
weighted avg       0.68      0.68      0.68      8000



In [10]:
print("\nModel Coefficients:")
for feat, coef in zip(features, model.coef_[0]):
    print(f"{feat}: {coef:.4f}")


Model Coefficients:
CURR_AGE: -0.1269
GENDER_ENC: 0.1091
ANN_INCOME: 0.4047
AGE_CAR: 0.8464


In [11]:
indian_data['GENDER_ENC'] = le.transform(indian_data['GENDER'])

In [12]:
avg_age_car = japanese_data['AGE_CAR'].mean()
indian_data['AGE_CAR'] = avg_age_car

In [13]:
X_india = indian_data[['CURR_AGE', 'GENDER_ENC', 'ANN_INCOME', 'AGE_CAR']]
X_india_scaled = scaler.transform(X_india)

In [14]:
india_predictions = model.predict(X_india_scaled)
indian_data['PREDICTED_PURCHASE'] = india_predictions

In [15]:
likely_buyers = indian_data['PREDICTED_PURCHASE'].sum()
print(f"\nLikely Car Purchases in Indian Market: {likely_buyers} out of {len(indian_data)}")


Likely Car Purchases in Indian Market: 69919 out of 70000


In [16]:
indian_data.to_excel("Predicted_India.xlsx", index=False)

In [18]:
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import StandardScaler

# Load Indian dataset
indian_data = pd.read_excel("IN_Data.xlsx")

# Encode gender if not already done
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
indian_data['GENDER_ENC'] = le.fit_transform(indian_data['GENDER'])

# Convert maintenance date and compute AGE_CAR
indian_data['DT_MAINT'] = pd.to_datetime(indian_data['DT_MAINT'])
today = pd.to_datetime("today")
indian_data['AGE_CAR'] = (today - indian_data['DT_MAINT']).dt.days // 30

# Scale and predict using pre-trained model
X_india_updated = indian_data[['CURR_AGE', 'GENDER_ENC', 'ANN_INCOME', 'AGE_CAR']]
X_india_scaled_updated = scaler.transform(X_india_updated)

# Predict
indian_data['PREDICTED_PURCHASE'] = model.predict(X_india_scaled_updated)

# Save the final output
indian_data.to_excel("Corrected_Predicted_India.xlsx", index=False)
