![giskard_logo.png](https://raw.githubusercontent.com/Giskard-AI/giskard/main/readme/Logo_full_darkgreen.png)


# About Giskard

Open-Source CI/CD platform for ML teams. Deliver ML products, better & faster. 

*   Collaborate faster with feedback from business stakeholders.
*   Deploy automated tests to eliminate regressions, errors & biases.

🏡 [Website](https://giskard.ai/)

📗 [Documentation](https://docs.giskard.ai/)

## Installing `giskard`

In [None]:
!pip install giskard

## Connect the external worker in daemon mode

In [None]:
!giskard worker start -d

## Curious to know how it works ? 
#### Lets Start by creating our awesome Lead Scoring Algorithm Classification Model. 🎉🎉🎉

In [None]:
import pandas as pd
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score


In [None]:
data = pd.read_csv('https://raw.githubusercontent.com/Giskard-AI/examples/main/datasets/bank_lead_scoring_algorithm_updated.csv')

In [None]:
data.head()

In [None]:
# Declare the type of each column in the dataset(example: category, numeric, text)
column_types = {'age': 'numeric',
                'job': 'category',
                'marital_status': 'category',
                'education': 'category',
                'defaulted_bank_credit': 'category',
                'avg_year_acc_balance': 'numeric',
                'housing_loan': 'category',
                'personal_loan':  'category',
                'contacted_via': 'category',
                'last_contact_dayofmonth': 'numeric',
                'last_contact_month': 'category',
                'last_call_duration_secs': 'numeric',
                'num_contact_dur_campaign': 'numeric',
                'dayscontact_since_lstcampaign': 'numeric',
                'num_contact_bfr_campaign': 'numeric',
                'outcome': 'category'
                }

In [None]:
# feature_types is used to declare the features the model is trained on
feature_types = {i:column_types[i] for i in column_types if i!='default'}

# Pipeline to fill missing values, transform and scale the numeric columns
numeric_features = [key for key in feature_types.keys() if feature_types[key]=="numeric"]
numeric_transformer = Pipeline([('imputer', SimpleImputer(missing_values= np.nan, strategy='mean')),
    ('scaler', StandardScaler())])

# Pipeline to fill missing values and one hot encode the categorical values
categorical_features = [key for key in feature_types.keys() if feature_types[key]=="category"]
categorical_transformer = Pipeline([
        ('imputer', SimpleImputer(missing_values= np.nan, strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore',sparse=False)) ])

# Perform preprocessing of the columns with the above pipelines
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
      ('cat', categorical_transformer, categorical_features)
    ]
)
# Pipeline for the model Logistic Regression
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', LogisticRegression(max_iter =1000))])

In [None]:
# Split the data into train and test
y = data['will_subscribe']
X = data.drop(columns="will_subscribe")
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.20, random_state = 30, stratify = y)

In [None]:
# Fit and score your model
clf.fit(X_train, y_train)
print("model score: %.3f" % clf.score(X_test, y_test))

In [None]:
from sklearn.metrics import confusion_matrix
y_pred = clf.predict(X_test)
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

f1 = f1_score(y_true = y_test, y_pred = y_pred, average='macro')
print('F1 Score: ', f1)

accuracy = accuracy_score(y_true = y_test, y_pred = y_pred)
print("Accuracy:", accuracy)

In [None]:
train_data = pd.concat([X_train, y_train], axis=1)
test_data = pd.concat([X_test, y_test ], axis=1)

In [None]:
from giskard import GiskardClient

url = "http://localhost:19000" #if Giskard is installed locally (for installation, see: https://docs.giskard.ai/start/guides/installation)
#url = "http://app.giskard.ai" # If you want to upload on giskard URL
token = "YOUR GENERATED TOKEN" #you can generate your API token in the Admin tab of the Giskard application (for installation, see: https://docs.giskard.ai/start/guides/installation)

client = GiskardClient(url, token)

# your_project = client.create_project("project_key", "PROJECT_NAME", "DESCRIPTION")
# Choose the arguments you want. But "project_key" should be unique and in lower case
lead_scoring = client.create_project("lead_scoring", "Lead Scoring Scoring", "Project to predict if user will subscribe to the bank")

# If you've already created a project with the key "lead_scoring" use
#credit_scoring = client.get_project("lead_scoring")


In [None]:
lead_scoring.upload_model_and_df(
    prediction_function=clf.predict_proba, # Python function which takes pandas dataframe as input and returns probabilities for classification model OR returns predictions for regression model
    model_type='classification', # "classification" for classification model OR "regression" for regression model
    df=test_data, # the dataset you want to use to inspect your model
    column_types=column_types, # A dictionary with columns names of df as key and types(category, numeric, text) of columns as values
    target='will_subscribe', # The column name in df corresponding to the actual target variable (ground truth).
    feature_names=list(feature_types.keys()), # List of the feature names of prediction_function
    classification_labels=clf.classes_,  # List of the classification labels of your prediction
    model_name='logistic_regression_v1', # Name of the model
    dataset_name='test_data' # Name of the dataset
)

### Happy Exploration ! 🧑‍🚀