# 1. Dataset schema and storage

In [1]:
# Create the directory datasets
!mkdir datasets
import pandas as pd

In [2]:
# Download the zipped file from the google drive and store it in airline.zip
!gdown 1PKSwiZuBcGzSFE3ruHbbTlGCpJ0ruP9d -O datasets/airline.zip

Downloading...
From: https://drive.google.com/uc?id=1PKSwiZuBcGzSFE3ruHbbTlGCpJ0ruP9d
To: /content/datasets/airline.zip
  0% 0.00/2.84M [00:00<?, ?B/s]100% 2.84M/2.84M [00:00<00:00, 51.6MB/s]


In [3]:
# Check the contents of datasets
!ls -al datasets/

total 2784
drwxr-xr-x 2 root root    4096 Dec  9 06:10 .
drwxr-xr-x 1 root root    4096 Dec  9 06:10 ..
-rw-r--r-- 1 root root 2841945 Dec  6 12:40 airline.zip


In [4]:
# Check the information about the file's type
!file datasets/airline.zip

datasets/airline.zip: Zip archive data, at least v4.5 to extract, compression method=deflate


In [5]:
# Unzip the airline.zip and retrive the train.csv and test.csv
!unzip datasets/airline.zip -d datasets/

Archive:  datasets/airline.zip
  inflating: datasets/test.csv       
  inflating: datasets/train.csv      


In [6]:
# Check the contents of datasets
!ls /content/datasets

airline.zip  test.csv  train.csv


In [7]:
# Read the CSV file into train_df
train_df = pd.read_csv("/content/datasets/train.csv")
train_df

Unnamed: 0.1,Unnamed: 0,id,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
0,0,70172,Male,Loyal Customer,13,Personal Travel,Eco Plus,460,3,4,...,5,4,3,4,4,5,5,25,18.0,neutral or dissatisfied
1,1,5047,Male,disloyal Customer,25,Business travel,Business,235,3,2,...,1,1,5,3,1,4,1,1,6.0,neutral or dissatisfied
2,2,110028,Female,Loyal Customer,26,Business travel,Business,1142,2,2,...,5,4,3,4,4,4,5,0,0.0,satisfied
3,3,24026,Female,Loyal Customer,25,Business travel,Business,562,2,5,...,2,2,5,3,1,4,2,11,9.0,neutral or dissatisfied
4,4,119299,Male,Loyal Customer,61,Business travel,Business,214,3,3,...,3,3,4,4,3,3,3,0,0.0,satisfied
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103899,103899,94171,Female,disloyal Customer,23,Business travel,Eco,192,2,1,...,2,3,1,4,2,3,2,3,0.0,neutral or dissatisfied
103900,103900,73097,Male,Loyal Customer,49,Business travel,Business,2347,4,4,...,5,5,5,5,5,5,4,0,0.0,satisfied
103901,103901,68825,Male,disloyal Customer,30,Business travel,Business,1995,1,1,...,4,3,2,4,5,5,4,7,14.0,neutral or dissatisfied
103902,103902,54173,Female,disloyal Customer,22,Business travel,Eco,1000,1,1,...,1,4,5,1,5,4,1,0,0.0,neutral or dissatisfied


In [8]:
# Check the data types in the file
train_df.dtypes

Unnamed: 0,0
Unnamed: 0,int64
id,int64
Gender,object
Customer Type,object
Age,int64
Type of Travel,object
Class,object
Flight Distance,int64
Inflight wifi service,int64
Departure/Arrival time convenient,int64


In [9]:
# Remove rows with missing values in the specific column - Arrival Delay in Minutes.
train_df.dropna(subset=['Arrival Delay in Minutes'], inplace=True)

train_df

Unnamed: 0.1,Unnamed: 0,id,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
0,0,70172,Male,Loyal Customer,13,Personal Travel,Eco Plus,460,3,4,...,5,4,3,4,4,5,5,25,18.0,neutral or dissatisfied
1,1,5047,Male,disloyal Customer,25,Business travel,Business,235,3,2,...,1,1,5,3,1,4,1,1,6.0,neutral or dissatisfied
2,2,110028,Female,Loyal Customer,26,Business travel,Business,1142,2,2,...,5,4,3,4,4,4,5,0,0.0,satisfied
3,3,24026,Female,Loyal Customer,25,Business travel,Business,562,2,5,...,2,2,5,3,1,4,2,11,9.0,neutral or dissatisfied
4,4,119299,Male,Loyal Customer,61,Business travel,Business,214,3,3,...,3,3,4,4,3,3,3,0,0.0,satisfied
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103899,103899,94171,Female,disloyal Customer,23,Business travel,Eco,192,2,1,...,2,3,1,4,2,3,2,3,0.0,neutral or dissatisfied
103900,103900,73097,Male,Loyal Customer,49,Business travel,Business,2347,4,4,...,5,5,5,5,5,5,4,0,0.0,satisfied
103901,103901,68825,Male,disloyal Customer,30,Business travel,Business,1995,1,1,...,4,3,2,4,5,5,4,7,14.0,neutral or dissatisfied
103902,103902,54173,Female,disloyal Customer,22,Business travel,Eco,1000,1,1,...,1,4,5,1,5,4,1,0,0.0,neutral or dissatisfied


In [10]:
# Check the shape of the dataframe
train_df.shape

(103594, 25)

### Optimizing Data Types

In [11]:
dtypes = {
    "Unnamed: 0": "int16", # This is the serial number and takes upto 6 digits
    "id": "int16",  # This is the id and takes upto 6 digits
    "Gender": "category", # This is the gender, and takes male or female
    "Customer Type": "category", # This takes loyal customer/disloyal customer
    "Age": "int8", # This takes age values ranging from 7 to 85
    "Type of Travel": "category", # Personal Travel/Business Travel
    "Class": "category", # Eco/Eco Plus/Business
    "Flight Distance": "int16", # Distance of the flight max 4963
    "Inflight wifi service": "category", # Rating of the service 0 to 5
    "Departure/Arrival time convenient": "category", # Rating of the service 0 to 5
    "Ease of Online booking": "category", # Rating of the service 0 to 5
    "Gate location": "category", # Rating of the service 0 to 5
    "Food and drink": "category", # Rating of the service 0 to 5
    "Online boarding": "category",  # Rating of the service 0 to 5
    "Seat comfort": "category", # Rating of the service 0 to 5
    "Inflight entertainment": "category", # Rating of the service 0 to 5
    "On-board service": "category", # Rating of the service 0 to 5
    "Leg room service": "category", # Rating of the service 0 to 5
    "Baggage handling": "category", # Rating of the service 0 to 5
    "Checkin service": "category", # Rating of the service 0 to 5
    "Inflight service": "category", # Rating of the service 0 to 5
    "Cleanliness": "category", # Rating of the service 0 to 5
    "Departure Delay in Minutes": "int16", # Duration of delay in departure
    "Arrival Delay in Minutes": "int16", # Duration of delay in arrival
    "satisfaction" : "category" # Satisfaction measured as neutral or dissatisfied/satisfied
}

In [12]:
# Update the data types of the dataset to optimize them based on their contents
train_new_df = pd.read_csv("/content/datasets/train.csv") #, dtype = dtypes)

train_new_df.dropna(subset=['Arrival Delay in Minutes'], inplace=True)

train_new_df = train_new_df.astype(dtypes)

train_new_df.dtypes

Unnamed: 0,0
Unnamed: 0,int16
id,int16
Gender,category
Customer Type,category
Age,int8
Type of Travel,category
Class,category
Flight Distance,int16
Inflight wifi service,category
Departure/Arrival time convenient,category


### Creating Parquet format

In [13]:
# Convert the dataframe to a parquet format and store it in datasets
train_new_df.to_parquet("/content/datasets/train.parquet")

In [14]:
# Check if the parquet format is stored as expected
!ls -alh datasets/

total 20M
drwxr-xr-x 2 root root 4.0K Dec  9 06:10 .
drwxr-xr-x 1 root root 4.0K Dec  9 06:10 ..
-rw-r--r-- 1 root root 2.8M Dec  6 12:40 airline.zip
-rw-r--r-- 1 root root 2.9M Feb 20  2020 test.csv
-rw-r--r-- 1 root root  12M Feb 20  2020 train.csv
-rw-r--r-- 1 root root 2.6M Dec  9 06:10 train.parquet


# 2. Profiling the dataset

In [15]:
# Install the ydata_profiling library
!pip install ydata_profiling

Collecting ydata_profiling
  Downloading ydata_profiling-4.12.1-py2.py3-none-any.whl.metadata (20 kB)
Collecting visions<0.7.7,>=0.7.5 (from visions[type_image_path]<0.7.7,>=0.7.5->ydata_profiling)
  Downloading visions-0.7.6-py3-none-any.whl.metadata (11 kB)
Collecting htmlmin==0.1.12 (from ydata_profiling)
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting phik<0.13,>=0.11.1 (from ydata_profiling)
  Downloading phik-0.12.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting multimethod<2,>=1.4 (from ydata_profiling)
  Downloading multimethod-1.12-py3-none-any.whl.metadata (9.6 kB)
Collecting imagehash==4.3.1 (from ydata_profiling)
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting dacite>=1.8 (from ydata_profiling)
  Downloading dacite-1.8.1-py3-none-any.whl.metadata (15 kB)
Collecting PyWavelets (from imagehash==4.3.1->ydata_profiling)
  Downloading pywavelets-1.

In [16]:
# Import Profile Report
from ydata_profiling import ProfileReport

In [17]:
profile = ProfileReport(train_new_df, title="Airline Experience Profiling Report")


In [18]:
profile.to_notebook_iframe()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

# 3. Train - Test Split

## Feature set selection

In [30]:
train_new_df.columns

Index(['Unnamed: 0', 'id', 'Gender', 'Customer Type', 'Age', 'Type of Travel',
       'Class', 'Flight Distance', 'Inflight wifi service',
       'Departure/Arrival time convenient', 'Ease of Online booking',
       'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
       'Inflight entertainment', 'On-board service', 'Leg room service',
       'Baggage handling', 'Checkin service', 'Inflight service',
       'Cleanliness', 'Departure Delay in Minutes', 'Arrival Delay in Minutes',
       'satisfaction'],
      dtype='object')

In [37]:
x_features = ['Gender', 'Customer Type', 'Age', 'Type of Travel',
       'Class', 'Flight Distance', 'Inflight wifi service',
       'Departure/Arrival time convenient', 'Ease of Online booking',
       'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
       'Inflight entertainment', 'On-board service', 'Leg room service',
       'Baggage handling', 'Checkin service', 'Inflight service',
       'Cleanliness', 'Departure Delay in Minutes', 'Arrival Delay in Minutes']

cat_vars = ['Gender', 'Customer Type', 'Type of Travel',
       'Class', 'Inflight wifi service',
       'Departure/Arrival time convenient', 'Ease of Online booking',
       'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
       'Inflight entertainment', 'On-board service', 'Leg room service',
       'Baggage handling', 'Checkin service', 'Inflight service',
       'Cleanliness']

num_vars = list(set(x_features) - set(cat_vars))

num_vars

['Arrival Delay in Minutes',
 'Departure Delay in Minutes',
 'Flight Distance',
 'Age']

In [21]:
train_new_df[x_features].info()

<class 'pandas.core.frame.DataFrame'>
Index: 103594 entries, 0 to 103903
Data columns (total 22 columns):
 #   Column                             Non-Null Count   Dtype   
---  ------                             --------------   -----   
 0   Gender                             103594 non-null  category
 1   Customer Type                      103594 non-null  category
 2   Age                                103594 non-null  int8    
 3   Type of Travel                     103594 non-null  category
 4   Class                              103594 non-null  category
 5   Flight Distance                    103594 non-null  int16   
 6   Inflight wifi service              103594 non-null  category
 7   Departure/Arrival time convenient  103594 non-null  category
 8   Ease of Online booking             103594 non-null  category
 9   Gate location                      103594 non-null  category
 10  Food and drink                     103594 non-null  category
 11  Online boarding                

### Data Transformation

1. One Hot Encoding for Categorical Data
2. Scaling for Numeric Data

In [38]:
x = train_new_df[x_features]
y = train_new_df['satisfaction']

#train_new_df['Arrival Delay in Minutes']

### Data Splitting

In [23]:
from sklearn.model_selection import train_test_split

In [39]:
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    train_size = 0.8,
                                                    random_state = 60)

In [28]:
x_train.shape

(82875, 22)

In [29]:
x_test.shape

(20719, 22)

### Encoding Categorical Variables

In [40]:
from sklearn.preprocessing import OneHotEncoder

In [41]:
ohe_encoder = OneHotEncoder(handle_unknown='ignore')

### Scaling Numerical Values

In [42]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

### Creating Pipelines

In [43]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [44]:
non_imputed_num_transformer = Pipeline( steps = [('scaler', scaler)])
cat_transformer = Pipeline( steps = [('ohencoder', ohe_encoder)])

In [45]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num_not_imputed', non_imputed_num_transformer, num_vars),
        ('catvars', cat_transformer, cat_vars)])

### KNN (K Nearest Neighbor)

In [46]:
from sklearn.neighbors import KNeighborsRegressor

In [47]:
#knn = KNeighborsRegressor(n_neighbors=20)
knn = KNeighborsRegressor(n_neighbors=20, weights='distance')

In [48]:
knn_v1 = Pipeline(steps=[('preprocessor', preprocessor),
                          ('knn', knn)])

In [60]:
# Assuming 'satisfied' is represented by 1 and other values by 0:
y_train = y_train.map({'satisfied': 1, 'neutral or dissatisfied': 0}).astype(int)
y_test = y_test.map({'satisfied': 1, 'neutral or dissatisfied': 0}).astype(int)


In [61]:
knn_v1.fit(x_train, y_train)

In [62]:
from sklearn import set_config
set_config(display='diagram')

In [63]:
knn_v1

In [65]:
## Predict on test set
y_pred = knn_v1.predict(x_test)

###K Fold Cross Validation

In [66]:
from sklearn.model_selection import cross_val_score

In [67]:
scores = cross_val_score( knn_v1,
                          x_train,
                          y_train,
                          cv = 10,
                          scoring = 'r2')

In [69]:
scores

array([0.78695115, 0.80591409, 0.80031833, 0.81603674, 0.80455276,
       0.81404578, 0.80660739, 0.81040458, 0.81795743, 0.80381763])

In [70]:
scores.mean()

0.8066605894164791

In [71]:
scores.std()

0.008519749230817922

In [57]:
!pip install wandb



In [80]:
import wandb
import os
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, GridSearchCV

In [73]:
os.environ["WANDB_API_KEY"] = "2dd7cb2aa4dead52f2776ea6e5f391577474bbdb"

### Linear Regression

In [79]:
linear_reg = LinearRegression()

linear_model = Pipeline(steps=[('preprocessor', preprocessor),
                               ('linear_model', linear_reg)])
## Pipeline for the applying imputation and then scaling

linear_model.fit(x_train, y_train)

wandb.init(project='mlops_airlinesatisfaction', config=None, tags = ['Linear Model', 'baseline', 'OHE Encoding'])
wandb.run.name = "LinearModel"
rmse = np.sqrt(mean_squared_error(y_test, linear_model.predict(x_test)))
r2 = linear_model.score(x_test, y_test)

wandb.log( {"rmse" : rmse,
            "r2": r2} )

wandb.Artifact("LinearModel",
               type = 'model',
               description = None)

wandb.save()
wandb.finish()



0,1
r2,▁
rmse,▁

0,1
r2,0.71327
rmse,0.26567


### Decision Tree

In [81]:
params = {"max_depth": 10}

dtree = DecisionTreeRegressor(**params)

dtree_model = Pipeline(steps=[('preprocessor', preprocessor),
                               ('dt_model', dtree)])


dtree_model.fit(x_train, y_train)

wandb.init(project='mlops_airlinesatisfaction', config=params, tags = ['Decision Tree',
                                                           'OHE Encoding'])
wandb.run.name = "DecisionTree"
rmse = np.sqrt(mean_squared_error(y_test, dtree_model.predict(x_test)))
r2 = dtree_model.score(x_test, y_test)

wandb.log( {"rmse" : rmse,
            "r2": r2} )

wandb.Artifact("DecisionTree",
               type = 'model',
               description = params)

wandb.save()
wandb.finish()

0,1
r2,▁
rmse,▁

0,1
r2,0.82761
rmse,0.20599


### Manual Grid Search

In [82]:
from sklearn.model_selection import GridSearchCV

In [83]:
params = { "dt_model__max_depth" : range(5, 10)}

In [84]:
dtree = DecisionTreeRegressor()

dtree_model = Pipeline(steps=[('preprocessor', preprocessor),
                               ('dt_model', dtree)])

In [85]:
dt_grid = GridSearchCV(dtree_model,
                       param_grid = params,
                       cv = 10,
                       scoring = 'r2')

In [86]:
dt_grid.fit(x_train, y_train)

In [87]:
dt_grid.best_params_

{'dt_model__max_depth': 9}

In [88]:
dt_grid.best_score_

0.8113320562415929

In [89]:
pd.DataFrame(dt_grid.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_dt_model__max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
0,0.794698,0.134831,0.041252,0.009587,5,{'dt_model__max_depth': 5},0.698835,0.716765,0.707263,0.714198,0.705122,0.704588,0.691453,0.714273,0.734643,0.701108,0.708825,0.011329,5
1,0.962167,0.157727,0.041277,0.008147,6,{'dt_model__max_depth': 6},0.724705,0.7395,0.731142,0.738236,0.721778,0.727152,0.71292,0.742015,0.759255,0.722883,0.731959,0.012519,4
2,1.0948,0.168633,0.038753,0.008225,7,{'dt_model__max_depth': 7},0.761463,0.771061,0.764996,0.773007,0.760977,0.760194,0.751261,0.778477,0.782903,0.759833,0.766417,0.009221,3
3,1.394452,0.216014,0.03816,0.007631,8,{'dt_model__max_depth': 8},0.789837,0.800718,0.794402,0.801235,0.788903,0.79455,0.788707,0.803876,0.811756,0.792663,0.796665,0.007161,2
4,1.609878,0.189295,0.040565,0.007401,9,{'dt_model__max_depth': 9},0.800693,0.812776,0.806843,0.818034,0.809251,0.810879,0.809819,0.822618,0.822091,0.800317,0.811332,0.007427,1


### Using Sweep Functions

In [90]:
def train_decision_tree(config=None):
    # Initialize WandB
    with wandb.init(config=config):
        config = wandb.config

        dtree = DecisionTreeRegressor(max_depth=config.max_depth)

        dtree_model = Pipeline(steps=[('preprocessor', preprocessor),
                                      ('dt_model', dtree)])
        dtree_model.fit(x_train, y_train)

        # Evaluate the model
        rmse = np.sqrt(mean_squared_error(y_test, dtree_model.predict(x_test)))
        r2 = dtree_model.score(x_test, y_test)

        # Log metrics to WandB
        wandb.log( {"rmse" : rmse,
                    "r2": r2,
                    "max_depth": config.max_depth} )

In [91]:
sweep_config = {
    "method": "grid",
    "metric": {"name": "r2", "goal": "maximize"},
    "parameters": {
        "max_depth": {
            "values": [4, 6, 8, 12]  # Depths to evaluate
        },
    },
}

In [92]:
sweep_id = wandb.sweep(sweep_config, project="mlops_airlinesatisfaction")

Create sweep with ID: 0a668sr4
Sweep URL: https://wandb.ai/rachna-devraj-indian-school-of-business/mlops_airlinesatisfaction/sweeps/0a668sr4


In [93]:
wandb.agent(sweep_id,
            function=train_decision_tree)  # Run all experiments

[34m[1mwandb[0m: Agent Starting Run: pzjzhowq with config:
[34m[1mwandb[0m: 	max_depth: 4


0,1
max_depth,▁
r2,▁
rmse,▁

0,1
max_depth,4.0
r2,0.6607
rmse,0.289


[34m[1mwandb[0m: Agent Starting Run: vclgh8su with config:
[34m[1mwandb[0m: 	max_depth: 6


0,1
max_depth,▁
r2,▁
rmse,▁

0,1
max_depth,6.0
r2,0.73061
rmse,0.25751


[34m[1mwandb[0m: Agent Starting Run: t654otw9 with config:
[34m[1mwandb[0m: 	max_depth: 8


0,1
max_depth,▁
r2,▁
rmse,▁

0,1
max_depth,8.0
r2,0.79547
rmse,0.22437


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j5mt6jyv with config:
[34m[1mwandb[0m: 	max_depth: 12


0,1
max_depth,▁
r2,▁
rmse,▁

0,1
max_depth,12.0
r2,0.84038
rmse,0.19822


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


### Get Experiment Details

In [95]:
api = wandb.Api()

all_runs = api.runs("rachna-devraj-indian-school-of-business/mlops_airlinesatisfaction", order="+summary_metrics.rmse")

for run in all_runs:
  print(f"Model Name: {run.name} and R2 {run.summary.get('r2')}")
  print(run.config)

Model Name: breezy-sweep-4 and R2 0.8403806164770024
{'max_depth': 12}
Model Name: DecisionTree and R2 0.82761350115201
{'max_depth': 10}
Model Name: winter-sweep-3 and R2 0.7954747104611594
{'max_depth': 8}
Model Name: ethereal-sweep-2 and R2 0.7306127182594144
{'max_depth': 6}
Model Name: LinearModel and R2 0.7132693542586106
{}
Model Name: hardy-sweep-1 and R2 0.6606996787206357
{'max_depth': 4}
Model Name: LinearModel and R2 None
{}


### Storing the model into a file

In [96]:
from joblib import dump

MODEL_DIR = "./airlinemodel"

os.mkdir(MODEL_DIR)
dump(linear_model, MODEL_DIR + "/" + 'airline.pkl')

['./airlinemodel/airline.pkl']

###Logging the model artifact in the tracking tools (W & B)

In [97]:
wandb.init(project='mlops_airlinesatisfaction',
           config=None,
           tags = ['Final Model'])
wandb.run.name = "FinalModel"



In [98]:
model_artifact = wandb.Artifact("Linear_Model_AirlineSatisfaction",
                                type = 'model',
                                description = 'Linear Model for Airline Satisfaction prediction')

In [99]:
model_artifact.add_dir(MODEL_DIR)

[34m[1mwandb[0m: Adding directory to artifact (./airlinemodel)... Done. 0.0s


In [100]:
wandb.run.log_artifact(model_artifact)

<Artifact Linear_Model_AirlineSatisfaction>

In [101]:
wandb.save()
wandb.finish()

In [102]:
import sklearn
sklearn.__version__

'1.5.2'