In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, confusion_matrix
from sklearn.preprocessing import StandardScaler

Part 1: Data Preprocessing

1. Load the Dataset

In [None]:
df = pd.read_csv("Netflix_Userbase.csv")


2. Handlimg missing values

In [None]:
print(df.isnull().sum())

3. Encode Categorical Variables

In [None]:
df_dummies = pd.get_dummies(df, drop_first=True)
df_dummies['Monthly Revenue']=(df['Monthly Revenue']>10).astype(int)

4. Feature Selection

In [None]:
X = df_dummies.drop(['Monthly Revenue','Age'], axis=1)
y = df_dummies['Monthly Revenue']

Part 2: Exploratory Data Analysis (EDA)

1. Descriptive Statistics

In [None]:
print(X.describe())


2. Visualizations

In [None]:
plt.figure(figsize=(8, 6))
sns.histplot(df_dummies['Monthly Revenue'], bins=20, kde=True)
plt.title('Distribution of Monthly Revenue')
plt.xlabel('Monthly Revenue')
plt.ylabel('Count')
plt.show()

plt.figure(figsize=(8, 6))
sns.histplot(df['Age'], bins=20, kde=True)
plt.title('Distribution of Age')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()

Part 3: Linear Regression Model (Predicting Monthly Revenue)

1. Build the Model

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

In [None]:
y_pred = lr_model.predict(X_test)


2. Model Evaluation

In [None]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Linear Regression Model Evaluation:")
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R2):", r2)

Part 4: Logistic Regression Model (Predicting Customer Feedback)

1. Model Building

In [None]:
df_dummies['Feedback'] = (df_dummies['Monthly Revenue'] > df_dummies['Monthly Revenue'].mean()).astype(int)

X_logistic = df_dummies.drop(['Monthly Revenue', 'Feedback'], axis=1)
y_logistic = df_dummies['Feedback']


In [None]:
X_train_logistic, X_test_logistic, y_train_logistic, y_test_logistic = train_test_split(X_logistic, y_logistic, test_size=0.2, random_state=42)


In [None]:
logistic_model = LogisticRegression()
logistic_model.fit(X_train_logistic, y_train_logistic)

In [None]:
y_pred_logistic = logistic_model.predict(X_test_logistic)

2. Model Evaluation

In [None]:
accuracy = accuracy_score(y_test_logistic, y_pred_logistic)
precision = precision_score(y_test_logistic, y_pred_logistic)
recall = recall_score(y_test_logistic, y_pred_logistic)
f1 = f1_score(y_test_logistic, y_pred_logistic)
conf_matrix = confusion_matrix(y_test_logistic, y_pred_logistic)

print("Logistic Regression Model Evaluation:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:")
print(conf_matrix)