In [1]:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv("risk_test.csv")
df.head()

Unnamed: 0,customer_id,age,gender,product,subscription_age,num_cases,subscription_status
0,C2450,58,female,prd_1,742,1,inactive
1,C2465,40,female,prd_1,1825,1,active
2,C2468,64,male,prd_1,1825,1,active
3,C2476,55,male,prd_1,1825,1,active
4,C2478,30,female,prd_2,96,1,inactive


In [3]:
subdf = df[['age','gender','product','subscription_age','num_cases','subscription_status']]
subdf.head()

Unnamed: 0,age,gender,product,subscription_age,num_cases,subscription_status
0,58,female,prd_1,742,1,inactive
1,40,female,prd_1,1825,1,active
2,64,male,prd_1,1825,1,active
3,55,male,prd_1,1825,1,active
4,30,female,prd_2,96,1,inactive


In [4]:
subdf=pd.get_dummies(data=subdf, columns=['gender', 'product','subscription_status'])
subdf.drop('subscription_status_inactive',axis='columns',inplace=True)

In [5]:
X = subdf[['age','subscription_age','num_cases','gender_female','gender_male','product_prd_1','product_prd_2']]

In [6]:
X.head()

Unnamed: 0,age,subscription_age,num_cases,gender_female,gender_male,product_prd_1,product_prd_2
0,58,742,1,1,0,1,0
1,40,1825,1,1,0,1,0
2,64,1825,1,0,1,1,0
3,55,1825,1,0,1,1,0
4,30,96,1,1,0,0,1


In [7]:
y = subdf.subscription_status_active

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.3)

In [9]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

In [10]:
model.fit(X_train, y_train)

LogisticRegression()

In [11]:
ypred = model.predict(X_test)

In [12]:
model.score(X_test,y_test)

0.7790699306391806

In [13]:
from sklearn.metrics import confusion_matrix

In [14]:
print(confusion_matrix(y_test, ypred))

[[     0  78707]
 [     0 277546]]


In [15]:
from sklearn.metrics import classification_report

In [16]:
print(classification_report(y_test, ypred))

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00     78707
           1       0.78      1.00      0.88    277546

    accuracy                           0.78    356253
   macro avg       0.39      0.50      0.44    356253
weighted avg       0.61      0.78      0.68    356253



In [17]:
customers_df = pd.read_csv("risk_test.csv")
customers_df.head()

Unnamed: 0,customer_id,age,gender,product,subscription_age,num_cases,subscription_status
0,C2450,58,female,prd_1,742,1,inactive
1,C2465,40,female,prd_1,1825,1,active
2,C2468,64,male,prd_1,1825,1,active
3,C2476,55,male,prd_1,1825,1,active
4,C2478,30,female,prd_2,96,1,inactive


In [18]:
customers_subdf = customers_df[['age','gender','product','subscription_age','num_cases','subscription_status']]
customers_subdf.head()

Unnamed: 0,age,gender,product,subscription_age,num_cases,subscription_status
0,58,female,prd_1,742,1,inactive
1,40,female,prd_1,1825,1,active
2,64,male,prd_1,1825,1,active
3,55,male,prd_1,1825,1,active
4,30,female,prd_2,96,1,inactive


In [19]:
customers_subdf=pd.get_dummies(data=customers_subdf, columns=['gender', 'product'])
customers_subdf.head()

Unnamed: 0,age,subscription_age,num_cases,subscription_status,gender_female,gender_male,product_prd_1,product_prd_2
0,58,742,1,inactive,1,0,1,0
1,40,1825,1,active,1,0,1,0
2,64,1825,1,active,0,1,1,0
3,55,1825,1,active,0,1,1,0
4,30,96,1,inactive,1,0,0,1


In [20]:
customers_subdf.drop('subscription_status',axis='columns',inplace=True)

In [21]:
customers_subdf.head()

Unnamed: 0,age,subscription_age,num_cases,gender_female,gender_male,product_prd_1,product_prd_2
0,58,742,1,1,0,1,0
1,40,1825,1,1,0,1,0
2,64,1825,1,0,1,1,0
3,55,1825,1,0,1,1,0
4,30,96,1,1,0,0,1


In [22]:
risk_score=model.predict_proba(customers_subdf)[:,1]

In [23]:
risk_score

array([0.80063271, 0.92565011, 0.92735826, ..., 0.70001079, 0.69777053,
       0.70138093])

In [25]:
customers_with_score = customers_df[['customer_id','age','gender','product','subscription_age','num_cases']]
customers_with_score.head()  

Unnamed: 0,customer_id,age,gender,product,subscription_age,num_cases
0,C2450,58,female,prd_1,742,1
1,C2465,40,female,prd_1,1825,1
2,C2468,64,male,prd_1,1825,1
3,C2476,55,male,prd_1,1825,1
4,C2478,30,female,prd_2,96,1


In [26]:
customers_with_score['prob_stay_active'] = risk_score.tolist()

In [27]:
customers_with_score.head()

Unnamed: 0,customer_id,age,gender,product,subscription_age,num_cases,prob_stay_active
0,C2450,58,female,prd_1,742,1,0.800633
1,C2465,40,female,prd_1,1825,1,0.92565
2,C2468,64,male,prd_1,1825,1,0.927358
3,C2476,55,male,prd_1,1825,1,0.926941
4,C2478,30,female,prd_2,96,1,0.694698


In [28]:
customers_with_score

Unnamed: 0,customer_id,age,gender,product,subscription_age,num_cases,prob_stay_active
0,C2450,58,female,prd_1,742,1,0.800633
1,C2465,40,female,prd_1,1825,1,0.925650
2,C2468,64,male,prd_1,1825,1,0.927358
3,C2476,55,male,prd_1,1825,1,0.926941
4,C2478,30,female,prd_2,96,1,0.694698
...,...,...,...,...,...,...,...
508927,C511350,57,male,prd_2,1,0,0.699940
508928,C511364,68,male,prd_1,1,0,0.672855
508929,C511367,70,female,prd_2,1,0,0.700011
508930,C511368,42,male,prd_2,1,0,0.697771


In [29]:
customers_with_score.to_csv('customers_with_score.csv')