In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

# Step 1: Load datasets
japanese = pd.read_excel('/content/JPN Data (1).xlsx')
indian = pd.read_excel('/content/IN_Data (1).xlsx')



In [None]:
indian.head()

Unnamed: 0,ID,CURR_AGE,GENDER,ANN_INCOME,DT_MAINT
0,20710B05XL,54,M,1425390,2018-04-20
1,89602T51HX,47,M,1678954,2018-06-08
2,70190Z52IP,60,M,931624,2017-07-31
3,25623V15MU,55,F,1106320,2017-07-31
4,36230I68CE,32,F,748465,2019-01-27


In [None]:
japanese.head()

Unnamed: 0,ID,CURR_AGE,GENDER,ANN_INCOME,AGE_CAR,PURCHASE
0,00001Q15YJ,50,M,445344.0,439,0
1,00003I71CQ,35,M,107634.0,283,0
2,00003N47FS,59,F,502786.666667,390,1
3,00005H41DE,43,M,585664.0,475,0
4,00007E17UM,39,F,705722.666667,497,1


In [None]:
# Step 2: Convert DT_MAINT to datetime (if still present)
indian['DT_MAINT'] = pd.to_datetime(indian['DT_MAINT'], errors='coerce')


In [None]:
# Step 3: Create DAYS_SINCE_7JUL2019
ref_date = pd.Timestamp('2019-07-07')
indian['AGE_CAR'] = abs((indian['DT_MAINT'] - ref_date).dt.days)

In [None]:
# Step 4: Drop DT_MAINT column
indian.drop(columns=['DT_MAINT'], inplace=True)

In [None]:
indian.head()

Unnamed: 0,ID,CURR_AGE,GENDER,ANN_INCOME,AGE_CAR
0,20710B05XL,54,M,1425390,443
1,89602T51HX,47,M,1678954,394
2,70190Z52IP,60,M,931624,706
3,25623V15MU,55,F,1106320,706
4,36230I68CE,32,F,748465,161


In [None]:
# Step 5: Encode GENDER in both datasets
le = LabelEncoder()
japanese['GENDER'] = le.fit_transform(japanese['GENDER'])
indian['GENDER'] = le.transform(indian['GENDER'])

In [None]:
# Step 6: Select features and target
X_train = japanese[['CURR_AGE', 'ANN_INCOME', 'GENDER', 'AGE_CAR']]
y_train = japanese['PURCHASE']


In [None]:
# Step 7: Prepare Indian features
X_indian = indian[['CURR_AGE', 'ANN_INCOME', 'GENDER', 'AGE_CAR']]

In [None]:
# Step 8: Train the model
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# Step 9: Predict on Indian dataset
indian['Predicted_Purchase'] = model.predict(X_indian)
indian['Predicted_Purchase']

Unnamed: 0,Predicted_Purchase
0,1
1,1
2,1
3,1
4,1
...,...
69995,1
69996,1
69997,1
69998,1


``

           ID  CURR_AGE  GENDER  ANN_INCOME  AGE_CAR  Predicted_Purchase
0  20710B05XL        54       1     1425390      443                   1
1  89602T51HX        47       1     1678954      394                   1
2  70190Z52IP        60       1      931624      706                   1
3  25623V15MU        55       0     1106320      706                   1
4  36230I68CE        32       0      748465      161                   1


In [None]:
print(japanese['PURCHASE'].value_counts())


PURCHASE
1    23031
0    16969
Name: count, dtype: int64


In [None]:
print(indian['Predicted_Purchase'].value_counts())


Predicted_Purchase
1    67432
0     2568
Name: count, dtype: int64


In [None]:
# Filter rows where purchase is 0
purchase_0 = japanese[japanese['PURCHASE'] == 0]

# Show them
print(purchase_0)

# Or just show count
print("Number of PURCHASE = 0:", len(purchase_0))


               ID  CURR_AGE  GENDER     ANN_INCOME  AGE_CAR  PURCHASE
0      00001Q15YJ        50       1  445344.000000      439         0
1      00003I71CQ        35       1  107634.000000      283         0
3      00005H41DE        43       1  585664.000000      475         0
6      00015B11UO        54       1   85056.000000      425         0
7      00020K99TA        28       0  453584.000000      173         0
...           ...       ...     ...            ...      ...       ...
39988  99977F94HV        47       1  278285.000000      236         0
39990  99979M68WW        41       0  368506.666667      243         0
39991  99981Z17GC        58       0  263517.333333      510         0
39994  99986U33RY        57       1  682123.000000      511         0
39996  99990Q44VP        28       1  271419.000000       61         0

[16969 rows x 6 columns]
Number of PURCHASE = 0: 16969
