<a href="https://colab.research.google.com/github/lalitha12-web/Telecom-Churn-Group-Case-Study/blob/main/Telecom_Churn_Group_Case_Study.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
#Data Structures
import pandas as pd
import numpy as np
import re
import os

### For installing missingno library, type this command in terminal
#pip install missingno

import missingno as msno

#Sklearn
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, precision_score, recall_score

#Plotting
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

#Others
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [1]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# read the data
filepath="/content/drive/MyDrive/IIITB-AIML/Telecom Churn Group Case Study/train.csv"
train_data= pd.read_csv(filepath)


In [7]:
# Display basic information about the training data
print("Shape of the training data:", train_data.shape)
print("\nFirst few rows of the training data:")
print(train_data.head())

Shape of the training data: (11652, 172)

First few rows of the training data:
   id  circle_id  loc_og_t2o_mou  std_og_t2o_mou  loc_ic_t2o_mou  \
0   0        109             0.0             0.0             0.0   
1   1        109             0.0             0.0             0.0   
2   2        109             0.0             0.0             0.0   
3   3        109             0.0             0.0             0.0   
4   4        109             0.0             0.0             0.0   

  last_date_of_month_6 last_date_of_month_7 last_date_of_month_8   arpu_6  \
0            6/30/2014            7/31/2014            8/31/2014   31.277   
1            6/30/2014            7/31/2014            8/31/2014    0.000   
2            6/30/2014            7/31/2014            8/31/2014   60.806   
3            6/30/2014            7/31/2014            8/31/2014  156.362   
4            6/30/2014            7/31/2014            8/31/2014  240.708   

    arpu_7  ...  sachet_3g_7  sachet_3g_8  fb_use

In [8]:
# Display basic information about the data
print("\nData Info:")
print(train_data.info())


Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11652 entries, 0 to 11651
Columns: 172 entries, id to churn_probability
dtypes: float64(161), int64(2), object(9)
memory usage: 15.3+ MB
None


In [9]:
# Describe the training data (numerical columns)
print("\nDescriptive statistics for numerical columns:")
print(train_data.describe())


Descriptive statistics for numerical columns:
                 id  circle_id  loc_og_t2o_mou  std_og_t2o_mou  \
count  11652.000000    11652.0         11529.0         11529.0   
mean    5825.500000      109.0             0.0             0.0   
std     3363.787003        0.0             0.0             0.0   
min        0.000000      109.0             0.0             0.0   
25%     2912.750000      109.0             0.0             0.0   
50%     5825.500000      109.0             0.0             0.0   
75%     8738.250000      109.0             0.0             0.0   
max    11651.000000      109.0             0.0             0.0   

       loc_ic_t2o_mou        arpu_6        arpu_7        arpu_8   onnet_mou_6  \
count         11529.0  11652.000000  11652.000000  11652.000000  11181.000000   
mean              0.0    278.645233    272.108095    274.089862    129.351097   
std               0.0    303.971079    285.330325    293.805635    283.783235   
min               0.0  -2041.22800

In [10]:
# Check for unique values in each column
print("\nUnique values in each column:")
for col in train_data.columns:
    print(f"{col}: {train_data[col].nunique()}")



Unique values in each column:
id: 11652
circle_id: 1
loc_og_t2o_mou: 1
std_og_t2o_mou: 1
loc_ic_t2o_mou: 1
last_date_of_month_6: 1
last_date_of_month_7: 1
last_date_of_month_8: 1
arpu_6: 10913
arpu_7: 10890
arpu_8: 10680
onnet_mou_6: 6557
onnet_mou_7: 6471
onnet_mou_8: 6340
offnet_mou_6: 8457
offnet_mou_7: 8426
offnet_mou_8: 8227
roam_ic_mou_6: 1604
roam_ic_mou_7: 1275
roam_ic_mou_8: 1195
roam_og_mou_6: 1842
roam_og_mou_7: 1428
roam_og_mou_8: 1376
loc_og_t2t_mou_6: 4495
loc_og_t2t_mou_7: 4470
loc_og_t2t_mou_8: 4427
loc_og_t2m_mou_6: 6643
loc_og_t2m_mou_7: 6589
loc_og_t2m_mou_8: 6520
loc_og_t2f_mou_6: 1456
loc_og_t2f_mou_7: 1470
loc_og_t2f_mou_8: 1460
loc_og_t2c_mou_6: 878
loc_og_t2c_mou_7: 970
loc_og_t2c_mou_8: 981
loc_og_mou_6: 7518
loc_og_mou_7: 7489
loc_og_mou_8: 7388
std_og_t2t_mou_6: 3834
std_og_t2t_mou_7: 3847
std_og_t2t_mou_8: 3799
std_og_t2m_mou_6: 4697
std_og_t2m_mou_7: 4705
std_og_t2m_mou_8: 4622
std_og_t2f_mou_6: 723
std_og_t2f_mou_7: 678
std_og_t2f_mou_8: 682
std_og_t2c_mo