#Market Entry Analysis for ABG Motors

Objective

To analyze Japanese market data to build a classification model that predicts customer purchase behavior and apply the model to the Indian market to estimate potential customers and support market entry decisions.

In [2]:


import pandas as pd
import numpy as np

# Load datasets
japan = pd.read_excel(r"C:\Users\subod\Downloads\JPN Data.xlsx")
india = pd.read_excel(r"C:\Users\subod\Downloads\IN_Data.xlsx")

# Handle missing values
japan.fillna(japan.mean(numeric_only=True), inplace=True)
india.fillna(india.mean(numeric_only=True), inplace=True)

# Encode categorical variables
japan = pd.get_dummies(japan, drop_first=True)
india = pd.get_dummies(india, drop_first=True)

# Automatically find target column (binary)
binary_cols = []
for col in japan.columns:
    if set(japan[col].dropna().unique()).issubset({0, 1}):
        binary_cols.append(col)

print("Binary columns found:", binary_cols)

# Use the correct target
target_col = "PURCHASE" 

y = japan[target_col]
X = japan.drop(target_col, axis=1)

print("X shape:", X.shape)
print("y shape:", y.shape)




Binary columns found: ['PURCHASE', 'ID_00003I71CQ', 'ID_00003N47FS', 'ID_00005H41DE', 'ID_00007E17UM', 'ID_00007I26OR', 'ID_00015B11UO', 'ID_00020K99TA', 'ID_00020W72QC', 'ID_00022F48XA', 'ID_00026X43XZ', 'ID_00031Q27QZ', 'ID_00032B38ZX', 'ID_00033C02IM', 'ID_00034P01OK', 'ID_00038B31VO', 'ID_00039X03RX', 'ID_00040B49KN', 'ID_00040O73KD', 'ID_00045U73GK', 'ID_00049M22HG', 'ID_00052E59WH', 'ID_00056R56IH', 'ID_00057N31MO', 'ID_00059Q04KC', 'ID_00061K54RB', 'ID_00062R49DY', 'ID_00067S81IY', 'ID_00069Q75MB', 'ID_00069R58QN', 'ID_00070L09VD', 'ID_00073G60ML', 'ID_00074E80ZM', 'ID_00078B50YA', 'ID_00083A72KE', 'ID_00085N14CA', 'ID_00086Y65GU', 'ID_00088T31UB', 'ID_00090Q78CZ', 'ID_00091B82ZO', 'ID_00093J67SK', 'ID_00095Q17AL', 'ID_00099K86CW', 'ID_00101P10AC', 'ID_00104R23KJ', 'ID_00105P34KP', 'ID_00105Z55XB', 'ID_00107Y54OB', 'ID_00108X61TB', 'ID_00111A34ML', 'ID_00113N53ZL', 'ID_00115C31GT', 'ID_00118J08EC', 'ID_00122X31AB', 'ID_00124L11WJ', 'ID_00126P38RZ', 'ID_00127P24GO', 'ID_00129H75Q

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

print("Model trained successfully")


Model trained successfully


In [None]:
y_pred = model.predict(X_test)
print("Predictions generated")


Predictions generated


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.6815

Confusion Matrix:
 [[2728 2285]
 [1537 5450]]

Classification Report:
               precision    recall  f1-score   support

           0       0.64      0.54      0.59      5013
           1       0.70      0.78      0.74      6987

    accuracy                           0.68     12000
   macro avg       0.67      0.66      0.66     12000
weighted avg       0.68      0.68      0.68     12000



In [None]:

# Take a SMALL sample from India
india_sample = india.sample(1500, random_state=42)

# Keep ONLY common columns between train and India
common_cols = X.columns.intersection(india_sample.columns)

X_india_safe = india_sample[common_cols]

# Also reduce training data to same columns
X_train_safe = X[common_cols]

# Retrain model on reduced feature set (FAST)
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(max_depth=6, random_state=42)
model.fit(X_train_safe.loc[X_india_safe.index.intersection(X_train_safe.index)], 
          y.loc[X_india_safe.index.intersection(X_train_safe.index)])

india_pred = model.predict(X_india_safe)

predicted_buyers = india_pred.sum()
print("Predicted buyers in Indian sample:", predicted_buyers)


Predicted buyers in Indian sample: 380


In [None]:

india_output = india_sample.copy()
india_output['Predicted_PURCHASE'] = india_pred

india_output.to_csv("india_predictions.csv", index=False)

print("india_predictions.csv exported successfully")


india_predictions.csv exported successfully


## Final Summary

- A Logistic Regression model was trained using Japanese market data.
- The model was evaluated using accuracy, confusion matrix, precision, recall, and F1-score.
- The trained model was applied to the Indian dataset.
- The estimated number of potential customers in India is **{potential_customers}**.
- Based on this estimate, ABG Motors can make an informed decision regarding market entry.
