### QEP Turing Report 2, Heart Disease Prediction using Logistic Regression and QEP Turing APIs

### Lists all the available Projects, specific to the user

In [66]:
import requests
# GET Method
response = requests.get("https://qep-turing-api.azurewebsites.net/projects/list")
print(response.json())

{'projects': ['Demo Project 1', 'Demo Project 2', 'Demo Project 3', 'Demo Project 4', 'Demo Project', 'tigertest', 'Power Generation Prediction', 'Power Generation Prediction using LinReg', 'Power Generation Prediction using LinReg Al', 'Power Generation Prediction using LinReg Al', 'Heart Disease Prediction using Logistic Regression', 'Heart Disease Prediction using Logistic Regression 2']}


### Create  a Project

In [65]:
#Post Method
response = requests.post("https://qep-turing-api.azurewebsites.net/projects/add", 
                         json = {"project_name" : "Heart Disease Prediction using Logistic Regression 2", "created_by" : "Prakash S"})
print(response.json())

{'message': 'Project Created Successfully'}


### Upload the Source file dataset

In [73]:
response = requests.post("http://127.0.0.1:8000/tables/file_upload", files = {"user_file" : open("framingham.csv", "rb")}, 
                        params = {"project_id" : "13", "project_version" : 1, "table_name": "heart_disease", "created_by" : "Prakash S"})
response.json()

{'Message': 'File Uploaded Successfully'}

### List all the Source Tables Available 

In [74]:
#GET Method
response =  requests.get("https://qep-turing-api.azurewebsites.net/tables/list?type=Source&project_id=13&version=1")
print(response.json())

{'Tables': ['heart_disease']}


### List all the columns in the specified source table

In [76]:
# GET Method
response =  requests.get("https://qep-turing-api.azurewebsites.net/tables/source/columns/list?table_name=heart_disease&project_id=13&project_version=1")
print(response.json())

{'columns': ['male', 'age', 'education', 'currentSmoker', 'cigsPerDay', 'BPMeds', 'prevalentStroke', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose', 'TenYearCHD']}


### Exploratory Data Analysis

### Schema for EDA

In [85]:
eda_data  = {
  "name": "EDA",
  "task_key": "task1",
  "table_name": "heart_disease",
  "target_column": "TenYearCHD",
  "is_classification": "1",
  "project_id": "13",
  "version": "1",
  "created_by": "4e6ca596-c969-47fd-9876-86b35b9dd917"
}

### Job Creation for Exploratory Data Analysis

In [89]:
response = requests.post("https://qep-turing-api.azurewebsites.net/eda/run" , json  = eda_data)
response.json()

{'message': 'Job Run triggered'}

### List all  the Transforms

In [17]:
#GET Method
response =  requests.get("https://qep-turing-api.azurewebsites.net/transforms/list")
print(response.json())

[{'transform_key': 'T1', 'transform_name': 'Drop null values'}, {'transform_key': 'T2', 'transform_name': 'Drop Duplicates'}, {'transform_key': 'T3', 'transform_name': 'Imputer'}, {'transform_key': 'T4', 'transform_name': 'Standard scaler'}, {'transform_key': 'T6', 'transform_name': 'Logarithmic Transform'}, {'transform_key': 'T7', 'transform_name': 'Reciprocal Transform'}, {'transform_key': 'T8', 'transform_name': 'OneHot Encoder'}, {'transform_key': 'T9', 'transform_name': 'Label Encoder'}, {'transform_key': 'T10', 'transform_name': 'MinMax scaler'}, {'transform_key': 'T11', 'transform_name': 'MaxAbs scaler'}, {'transform_key': 'T12', 'transform_name': 'Box-cox Transform'}, {'transform_key': 'T13', 'transform_name': 'Yeo-Johnson Transform'}, {'transform_key': 'T14', 'transform_name': 'Quantile Transformer'}, {'transform_key': 'T15', 'transform_name': 'Robust scaler'}, {'transform_key': 'T16', 'transform_name': 'Quantile Discretizer'}, {'transform_key': 'T17', 'transform_name': 'Vecto

### Getting the schema  of  a particular Transform

In [20]:
#GET Method
response = requests.get("https://qep-turing-api.azurewebsites.net/transforms/schema/list?transform_key=T4")
print(response.json())

{'_id': {'$oid': '622996fd2ca652af07530deb'}, 'transform_name': 'Standard scaler', 'transform_key': 'T4', 'version': '0.0.1', 'keys': ['numerical columns'], 'parameters': [{'key_name': 'numerical columns', 'key_type': 'List', 'key_description': 'Select the list of numerical columns  which need to be scaled', 'optional': False, 'custom_dropdown': False, 'dropdown_source': 'columns'}]}


### Building the  Schema for Transforms

In [77]:
#POST Method
transformation_data = {
    "project_id": "13",
    "project_version": 1,
    "Tasks": [
        {
            "task_key": "task0",
            "task_type": "Data_Ingestion",
            "table_name": "heart_disease",
            "marker_name": "totChol",
            "depends_on": []
        },
            
        {
            "task_key": "task1",
            "task_type": "Transform",
            "transform_info": "{'transformation_type':'T3','created_by':'Surya','columns':['glucose','totChol', 'cigsPerDay','BMI', 'heartRate'],'strategy':'mean','final_transform':false}",
            "depends_on": [
                {
                    "task_key": "task0"
                }
            ]
        },
        
        {
            "task_key": "task2",
            "task_type": "Transform",
            "transform_info": "{'transformation_type':'T3','created_by':'Surya','columns':['education', 'BPMeds'],'strategy':'median','final_transform':false}",
            "depends_on": [
                {
                    "task_key": "task1"
                }
            ]
        },
                
        {
            "task_key": "task3",
            "task_type": "Transform",
            "transform_info": "{'transformation_type':'T4','created_by':'Surya', 'columns':['cigsPerDay','totChol','sysBP','diaBP','BMI','heartRate','glucose','age'],'final_transform':true}",
            "depends_on": [
                {
                    "task_key": "task2"
                }
            ]
        }
    ],
    "quartz_cron_expression": "0 0/5 * * * ?"
}
response=requests.post("http://127.0.0.1:8000/transforms/run",json = transformation_data)
response

<Response [200]>

### Preview of the Transfomed Table

In [72]:
#GET Method
response=requests.get("https://qep-turing-api.azurewebsites.net/tables/tranformed/preview?project_id=6&version=2&table_name=task3&size=5")
print(response.json())

{'records': [{'male': 1, 'age': 39, 'education': 4, 'currentSmoker': 0, 'cigsPerDay': 0, 'BPMeds': 0, 'prevalentStroke': 0, 'prevalentHyp': 0, 'diabetes': 0, 'totChol': 195, 'sysBP': 106.0, 'diaBP': 70.0, 'BMI': 26.97, 'heartRate': 80, 'glucose': 77, 'TenYearCHD': 0, 'features': [0.0, 4.399172211475177, 4.809852761538266, 5.87699470226495, 6.624985099391438, 6.652704518095031, 3.3715286088257965, 4.549611806205418]}, {'male': 0, 'age': 46, 'education': 2, 'currentSmoker': 0, 'cigsPerDay': 0, 'BPMeds': 0, 'prevalentStroke': 0, 'prevalentHyp': 0, 'diabetes': 0, 'totChol': 250, 'sysBP': 121.0, 'diaBP': 81.0, 'BMI': 28.73, 'heartRate': 95, 'glucose': 76, 'TenYearCHD': 0, 'features': [0.0, 5.639964373686125, 5.490492303265379, 6.8005224411922995, 7.057316347998369, 7.900086615237849, 3.32774252299689, 5.3662087970628]}, {'male': 1, 'age': 48, 'education': 1, 'currentSmoker': 1, 'cigsPerDay': 20, 'BPMeds': 0, 'prevalentStroke': 0, 'prevalentHyp': 0, 'diabetes': 0, 'totChol': 245, 'sysBP': 12

### Model Training Module

### List of all the Transformed Tables for the given Project Id and Project Version

In [32]:
response =  requests.get("https://qep-turing-api.azurewebsites.net/tables/list?type=Transformed&project_id=6&version=2")
response.json()

{'Tables': ['task3']}

### List all the Columns of the Specific Transformed Table

In [34]:
response  = requests.get("https://qep-turing-api.azurewebsites.net/tables/transformed/columns/list?table_name=task3&project_id=6&version=2")
response.json()

{'columns': ['male',
  'age',
  'education',
  'currentSmoker',
  'cigsPerDay',
  'BPMeds',
  'prevalentStroke',
  'prevalentHyp',
  'diabetes',
  'totChol',
  'sysBP',
  'diaBP',
  'BMI',
  'heartRate',
  'glucose',
  'TenYearCHD',
  'features']}

### List of all the Models

In [57]:
response=requests.get("https://qep-turing-api.azurewebsites.net/models/list")
print(response.json())

[{'model_key': 'M1', 'model': 'Logistic Regression'}, {'model_key': 'M2', 'model': 'Random Forest'}, {'model_key': 'M3', 'model': 'Linear Regression'}, {'model_key': 'M5', 'model': 'AutoML Regression'}, {'model_key': 'M6', 'model': 'AutoML Classification'}, {'model_key': 'M00', 'model': 'Linear Regression Demo'}, {'model_key': 'M00', 'model': 'Linear Regression Demo'}, {'model_key': 'M00', 'model': 'Linear Regression Demo'}]


### Get a Particular Model


In [58]:
response=requests.get("https://qep-turing-api.azurewebsites.net/models/schema/list?model_key=M3")
print(response.json())

{'_id': {'$oid': '6228b1a3af62bde984049d82'}, 'model': 'Linear Regression', 'model_key': 'M3', 'version': '0.1.0', 'keys': ['source', 'target column', 'Train split ratio', 'maxIter', 'regParam', 'elasticNetParam', 'tol', 'fitIntercept', 'standardization', 'solver', 'weightCol', 'aggregationDepth', 'loss', 'epsilon', 'maxBlockSizeInMB'], 'parameters': [{'key_name': 'source', 'key_type': 'string', 'key_description': 'Select the name of the table that needs to passed for the model train', 'optional': False, 'custom_dropdown': False, 'dropdown_source': 'tables'}, {'key_name': 'target column', 'key_type': 'string', 'key_description': 'Select the name of the target column that needs to passed for the model train', 'optional': False, 'custom_dropdown': False, 'dropdown_source': 'columns'}, {'key_name': 'Train split ratio', 'key_type': 'float', 'key_description': 'Enter split ratio of the dataset for model train', 'optional': False}, {'key_name': 'maxIter', 'key_type': 'integer', 'key_descript

### Schema for model Training

In [78]:
training_data = {
  "project_id": "13",
  "project_version": 1,
  "model_key": "M1",
  "model_name": "Logistic_Regression_for_Heart_Disease_Prediction",
  "feature_column": "features",
  "target_column": "TenYearCHD",
  "train_split_ratio": 0.8,
  "transformed_table_name": "task3",
  "hyperparameters": {}
}


### Model Training


In [79]:
response=requests.post("https://qep-turing-api.azurewebsites.net/model_train/run",json = training_data)
response

<Response [200]>

### Metric of the Model Trained 

In [95]:
#GET Method
response = requests.get("https://qep-turing-api.azurewebsites.net/mlflow/run/detail_list?project_id=13&project_version=1")
response.json()

{'run': {'info': {'run_uuid': '420e84cc13ab490097c073e28100656b',
   'experiment_id': '3178308932673324',
   'status': 'FINISHED',
   'start_time': 1647511730101,
   'end_time': 1647511824340,
   'artifact_uri': 'dbfs:/databricks/mlflow-tracking/3178308932673324/420e84cc13ab490097c073e28100656b/artifacts',
   'lifecycle_stage': 'active',
   'run_id': '420e84cc13ab490097c073e28100656b'},
  'data': {'metrics': [{'key': 'Accuracy',
     'value': 0.8632019115890084,
     'timestamp': 1647511816921,
     'step': 0},
    {'key': 'F1',
     'value': 0.8098983258579266,
     'timestamp': 1647511817303,
     'step': 0},
    {'key': 'Test Error',
     'value': 0.13679808841099161,
     'timestamp': 1647511817090,
     'step': 0},
    {'key': 'Weighted Precision',
     'value': 0.8202514754764203,
     'timestamp': 1647511817442,
     'step': 0}],
   'tags': [{'key': 'mlflow.databricks.cluster.id',
     'value': '0118-060153-orupo48j'},
    {'key': 'mlflow.databricks.cluster.info',
     'value': 