In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.preprocessing import StandardScaler

# Create a dummy dataset
np.random.seed(0)
dummy_data = {
    'Feature1': np.random.normal(100, 10, 99).tolist() + [400, np.nan, np.nan],  # Normally distributed with an outlier
    'Feature2': np.random.randint(0, 100, 102).tolist(),  # Random integers
    'Category': ['A', 'B', 'C', 'D'] * 25 + [np.nan, 'A'],  # Categorical with some missing values
    'Target': np.random.choice([0, 1], 102).tolist()  # Binary target variable
}

# Convert the dictionary to a pandas DataFrame
df_dummy = pd.DataFrame(dummy_data)

# Display the first few rows of the dummy dataset
print(df_dummy.head())

     Feature1  Feature2 Category  Target
0  117.640523        32        A       1
1  104.001572        70        B       1
2  109.787380        85        C       0
3  122.408932        31        D       1
4  118.675580        13        A       0


In [2]:
def load_data(df):
    return df

def handle_missing_values(df):
    return df.fillna(df.mean())  # For numeric data, fill missing values with the mean

def remove_outliers(df, column_name):
    z_scores = np.abs(stats.zscore(df.select_dtypes(include=[np.number])))
    return df[(z_scores < 3).all(axis=1)]  # Remove rows with any outliers
    # Q1 = df[column_name].quantile(0.25)
    # Q3 = df[column_name].quantile(0.75)
    # IQR = Q3 - Q1
    # df = df[~((df[column_name] < (Q1 - 1.5 * IQR)) | (df[column_name] > (Q3 + 1.5 * IQR)))]
    # return df

def scale_data(df, column_name):
    scaler = StandardScaler()
    df[df.select_dtypes(include=[np.number]).columns] = scaler.fit_transform(df.select_dtypes(include=[np.number]))
    # m = df[column_name].mean()
    # s = df[column_name].std()
    # df[column_name] = (df[column_name] - m) / s    
    return df

def encode_categorical(df, categorical_columns):
    return pd.get_dummies(df, columns=categorical_columns)

def save_data(df, output_filepath):
    df.to_csv(output_filepath, index=False)

In [3]:
# Load the data
df_preprocessed = load_data(df_dummy)

# Encode categorical variables
df_preprocessed = encode_categorical(df_preprocessed, ['Category'])

# Handle missing values
df_preprocessed = handle_missing_values(df_preprocessed)

# Remove outliers
df_preprocessed = remove_outliers(df_preprocessed, "Feature1")

# Scale the data
df_preprocessed = scale_data(df_preprocessed, "Feature1")
# df_preprocessed = scale_data(df_preprocessed, "Feature2")


# Display the preprocessed data
print(df_preprocessed.head(7))
print(df_preprocessed.tail(7))

   Feature1  Feature2    Target  Category_A  Category_B  Category_C  \
0  1.696404 -0.513627  0.951662        True       False       False   
1  0.336810  0.888436  0.951662       False        True       False   
2  0.913566  1.441882 -1.050793       False       False        True   
3  2.171740 -0.550524  0.951662       False       False       False   
4  1.799583 -1.214659 -1.050793        True       False       False   
5 -1.036280  0.925333  0.951662       False        True       False   
6  0.885007  0.371886  0.951662       False       False        True   

   Category_D  
0       False  
1       False  
2       False  
3        True  
4       False  
5       False  
6       False  
     Feature1  Feature2    Target  Category_A  Category_B  Category_C  \
94   0.293158  0.076715  0.951662       False       False        True   
95   0.642260 -0.771902 -1.050793       False       False       False   
96  -0.051618  0.777747 -1.050793        True       False       False   
97   1.7181

In [4]:
# Save the cleaned and preprocessed DataFrame to a CSV file
save_data(df_preprocessed, 'preprocessed_dummy_data.csv')

print('Preprocessing complete. Preprocessed data saved as preprocessed_dummy_data.csv')

Preprocessing complete. Preprocessed data saved as preprocessed_dummy_data.csv


In [6]:
print(df_preprocessed.isnull().sum())

print(df_preprocessed.describe())

print(df_preprocessed.head())

print(df_preprocessed.columns)

Feature1      0
Feature2      0
Target        0
Category_A    0
Category_B    0
Category_C    0
Category_D    0
dtype: int64
           Feature1      Feature2        Target
count  1.010000e+02  1.010000e+02  1.010000e+02
mean   1.154192e-16 -2.198461e-18 -5.496154e-17
std    1.004988e+00  1.004988e+00  1.004988e+00
min   -2.607022e+00 -1.694312e+00 -1.050793e+00
25%   -6.944060e-01 -6.981091e-01 -1.050793e+00
50%    5.920661e-02 -1.815595e-01  9.516619e-01
75%    6.647063e-01  8.515397e-01  9.516619e-01
max    2.200511e+00  1.884639e+00  9.516619e-01
   Feature1  Feature2    Target  Category_A  Category_B  Category_C  \
0  1.696404 -0.513627  0.951662        True       False       False   
1  0.336810  0.888436  0.951662       False        True       False   
2  0.913566  1.441882 -1.050793       False       False        True   
3  2.171740 -0.550524  0.951662       False       False       False   
4  1.799583 -1.214659 -1.050793        True       False       False   

   Category_D  


### next

In [90]:
import torch, os
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [108]:
data = pd.read_csv('data.csv')
data.drop(axis=1, columns=["Contract", "PaymentMethod","CustomerID"], inplace=True)
print(data.head())
print(data.info())

data = data.dropna()  # Simple example of dropping missing values
data = pd.get_dummies(data, drop_first=True)

X = data.drop('Churn', axis=1)
y = data['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=int.from_bytes(os.urandom(4)))

   Tenure  MonthlyCharges  TotalCharges  Churn
0       5            70.0         350.0      1
1      10            85.5         850.5      0
2       3            55.3         165.9      1
3       8            90.0         720.0      0
4       2            65.2         130.4      1
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Tenure          5 non-null      int64  
 1   MonthlyCharges  5 non-null      float64
 2   TotalCharges    5 non-null      float64
 3   Churn           5 non-null      int64  
dtypes: float64(2), int64(2)
memory usage: 292.0 bytes
None


In [137]:
class ChurnModel(nn.Module):
    def __init__(self):
        super(ChurnModel, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = nn.functional.dropout(x, 0.5, training=self.training)
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

model = ChurnModel()

In [135]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop (simplified example)
for epoch in range(10):
    model.train()
    optimizer.zero_grad()
    outputs = model(torch.tensor(X_train.values).float())
    loss = criterion(outputs.squeeze(), torch.tensor(y_train.values).float())
    loss.backward()
    optimizer.step()

In [None]:
model.eval()
outputs = model(torch.tensor(X_test.values).float())
predictions = (outputs.squeeze().detach().numpy() > 0.5).astype(int)
accuracy = np.mean(predictions == y_test.values)
print(f'Test accuracy: {accuracy}')

Test accuracy: 0.0


In [None]:
# Apply dynamic quantization
quantized_model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)
torch.save(model.state_dict(), 'churn_model.pth')

### Docker

In [45]:
from flask import Flask, request, jsonify, Response, flash, redirect
import joblib, json

app = Flask(__name__)
# model = joblib.load('model.pkl')

@app.route('/predict', methods=['GET'])
def predict():
    for key in request.args.keys():
        print(key + " : " + request.args[key])
    return "<html><title>aaa</title><input>: data.text()}</input></html>"
    # return "", 201
    # return redirect(url_for('http://google.com'))


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=800)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:800
 * Running on http://192.168.1.21:800
Press CTRL+C to quit
127.0.0.1 - - [06/Jun/2025 19:37:09] "GET /predict?data=1&beta=2 HTTP/1.1" 200 -


data : 1
beta : 2


### deployment

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import joblib

# Load the dataset
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

# Train the model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Save the model to a file
joblib.dump(model, 'iris_model.pkl')

['iris_model.pkl']

In [3]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [None]:
from flask import Flask, request, jsonify
import numpy as np
import json

app = Flask(__name__)

# Load the model
model = joblib.load('iris_model.pkl')

@app.route('/predict', methods=['GET'])
def predict():
    data = request.args["data"]
    data = np.array(json.loads(data)).reshape(-1,4)
    prediction = model.predict(data)
    return "<html><title>aaa</title><h3>Model outcome: <i>" + str(prediction) + "</i></h3></html>"

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=80)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:80
 * Running on http://192.168.1.21:80
Press CTRL+C to quit
127.0.0.1 - - [09/Jun/2025 13:11:03] "GET /predict?data=[[1,2,3,4],[4,3,2,1]] HTTP/1.1" 200 -


In [54]:
json.__version__

'2.0.9'