# Deployment

- Cargamos el modelo
- Cargamos los datos futuros
- Preparar los datos futuros
- Aplicamos el modelo para la predicción

In [75]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer, MinMaxScaler, OneHotEncoder, LabelEncoder

In [76]:
# Paso 1: Limpiar atípicos
def remove_outliers(X):
    # Se asigna valor nulo a los atípicos en variables numéricas
    X.loc[(X['Age'] < 18) | (X['Age'] > 60), 'Age'] = np.nan
    X.loc[(X['Work/Study Hours'] < 0) | (X['Work/Study Hours'] > 12), 'Work/Study Hours'] = np.nan

    # Se asigna valor nulo a los valores fuera de categorías esperadas en variables categóricas
    X['Working Professional or Student'] = X['Working Professional or Student'].apply(lambda x: x if x in ['Working Professional', 'Student'] else np.nan)
    X['Dietary Habits'] = X['Dietary Habits'].apply(lambda x: x if x in ['Unhealthy', 'Healthy', 'Moderate'] else np.nan)
    X['Have you ever had suicidal thoughts ?'] = X['Have you ever had suicidal thoughts ?'].apply(lambda x: x if x in ['Yes', 'No'] else np.nan)
    X['Financial Stress'] = X['Financial Stress'].apply(lambda x: x if x in [1, 2, 3, 4, 5] else np.nan)
    X['Job/Study Satisfaction'] = X['Job/Study Satisfaction'].apply(lambda x: x if x in [1.0, 2.0, 3.0, 4.0, 5.0] else np.nan)
    X['Work/Academic Pressure'] = X['Work/Academic Pressure'].apply(lambda x: x if x in [1.0, 2.0, 3.0, 4.0, 5.0] else np.nan)

    # Validar todas las variables
    return X

In [77]:
#Cargamos el pipeline con el modelo
import pickle
filename = 'pipeline_Logistic_Regression_model.pkl'
pipeline = pickle.load(open(filename, 'rb'))
pipeline

In [78]:
#Cargamos los datos futuros
data = pd.read_csv("datos_futuros.csv")
data.head()

Unnamed: 0,Name,Gender,Age,City,Working Professional or Student,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness
0,Abhinav,Male,39,Indore,Working Professional,Chemist,,5.0,,,4.0,More than 8 hours,Unhealthy,B.Pharm,No,4,3,No
1,Rupak,Male,23,Hyderabad,Working Professional,Entrepreneur,,3.0,,,3.0,7-8 hours,Unhealthy,B.Tech,Yes,9,5,No
2,Chirag,Male,26,Jaipur,Student,,4.0,,9.56,5.0,,Less than 5 hours,Unhealthy,B.Ed,No,4,1,Yes
3,Ansh,Male,49,Srinagar,Working Professional,Teacher,,1.0,,,1.0,5-6 hours,Moderate,B.Ed,Yes,3,4,Yes
4,Arya,Male,28,Kalyan,Student,,1.0,,7.39,1.0,,More than 8 hours,Unhealthy,BCA,Yes,8,2,No


In [79]:
# Lista de columnas a convertir
columns_to_category = [
    'Name', 'Gender', 'City', 'Working Professional or Student', 'Profession',
    'Academic Pressure', 'Work Pressure', 'Study Satisfaction', 'Job Satisfaction',
    'Financial Stress', 'Sleep Duration', 'Dietary Habits', 'Degree',
    'Have you ever had suicidal thoughts ?', 'Family History of Mental Illness'
]

# Convertir cada columna en category
for column in columns_to_category:
    data[column] = data[column].astype('category')

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 18 columns):
 #   Column                                 Non-Null Count  Dtype   
---  ------                                 --------------  -----   
 0   Name                                   12 non-null     category
 1   Gender                                 12 non-null     category
 2   Age                                    12 non-null     int64   
 3   City                                   12 non-null     category
 4   Working Professional or Student        12 non-null     category
 5   Profession                             7 non-null      category
 6   Academic Pressure                      3 non-null      category
 7   Work Pressure                          9 non-null      category
 8   CGPA                                   3 non-null      float64 
 9   Study Satisfaction                     3 non-null      category
 10  Job Satisfaction                       9 non-null      category


In [80]:
common_categories = [1.0, 2.0, 3.0, 4.0, 5.0]

data['Job Satisfaction'] = data['Job Satisfaction'].cat.set_categories(common_categories)
data['Study Satisfaction'] = data['Study Satisfaction'].cat.set_categories(common_categories)
data['Academic Pressure'] = data['Academic Pressure'].cat.set_categories(common_categories)
data['Work Pressure'] = data['Work Pressure'].cat.set_categories(common_categories)

In [81]:
# Unir columnas para crear nuevas características
data['Job/Study Satisfaction'] = data['Job Satisfaction'].fillna(data['Study Satisfaction'])
data['Work/Academic Pressure'] = data['Academic Pressure'].fillna(data['Work Pressure'])

In [82]:
# Eliminar columnas innecesarias
data = data.drop(columns=['CGPA', 'Name', 'Profession', 'City', 'Family History of Mental Illness', 'Gender', 'Sleep Duration', 'Job Satisfaction', 'Study Satisfaction', 'Academic Pressure', 'Work Pressure'])

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 9 columns):
 #   Column                                 Non-Null Count  Dtype   
---  ------                                 --------------  -----   
 0   Age                                    12 non-null     int64   
 1   Working Professional or Student        12 non-null     category
 2   Dietary Habits                         12 non-null     category
 3   Degree                                 12 non-null     category
 4   Have you ever had suicidal thoughts ?  12 non-null     category
 5   Work/Study Hours                       12 non-null     int64   
 6   Financial Stress                       12 non-null     category
 7   Job/Study Satisfaction                 12 non-null     category
 8   Work/Academic Pressure                 12 non-null     category
dtypes: category(7), int64(2)
memory usage: 1.8 KB


In [84]:
data

Unnamed: 0,Age,Working Professional or Student,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Job/Study Satisfaction,Work/Academic Pressure
0,39,Working Professional,Unhealthy,B.Pharm,No,4,3,4.0,5.0
1,23,Working Professional,Unhealthy,B.Tech,Yes,9,5,3.0,3.0
2,26,Student,Unhealthy,B.Ed,No,4,1,5.0,4.0
3,49,Working Professional,Moderate,B.Ed,Yes,3,4,1.0,1.0
4,28,Student,Unhealthy,BCA,Yes,8,2,1.0,1.0
5,20,Working Professional,Moderate,Class 12,Yes,0,2,2.0,5.0
6,27,Working Professional,Healthy,B.Com,No,9,2,1.0,3.0
7,42,Working Professional,Unhealthy,BE,No,3,4,5.0,4.0
8,48,Working Professional,Unhealthy,MHM,No,12,2,5.0,5.0
9,57,Working Professional,Moderate,LLB,Yes,7,5,2.0,2.0


In [85]:
#Hacemos la predicción con el Tree
Y_LR = pipeline.predict(data)

In [86]:
data['Depression Prediction']=Y_LR
data

Unnamed: 0,Age,Working Professional or Student,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Job/Study Satisfaction,Work/Academic Pressure,Depression Prediction
0,39.0,Working Professional,Unhealthy,B.Pharm,No,4.0,3,4.0,5.0,No
1,23.0,Working Professional,Unhealthy,B.Tech,Yes,9.0,5,3.0,3.0,Yes
2,26.0,Student,Unhealthy,B.Ed,No,4.0,1,5.0,4.0,No
3,49.0,Working Professional,Moderate,B.Ed,Yes,3.0,4,1.0,1.0,No
4,28.0,Student,Unhealthy,BCA,Yes,8.0,2,1.0,1.0,Yes
5,20.0,Working Professional,Moderate,Class 12,Yes,0.0,2,2.0,5.0,Yes
6,27.0,Working Professional,Healthy,B.Com,No,9.0,2,1.0,3.0,No
7,42.0,Working Professional,Unhealthy,BE,No,3.0,4,5.0,4.0,No
8,48.0,Working Professional,Unhealthy,MHM,No,12.0,2,5.0,5.0,No
9,57.0,Working Professional,Moderate,LLB,Yes,7.0,5,2.0,2.0,No
