<a href="https://colab.research.google.com/github/tanviredu/PowerBI/blob/main/MLOPS_GETTING_STARTED_AUTO_MPG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'autompg-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F1489%2F2704%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240815%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240815T103857Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D331d0110efc7f569caba33caef397b48b85f2d7b0484b9f04dda12f4e077fc98ea4f5b839f044a7e432e2229cd7e6ab139067f0db5623d7a57a235310630e0ce95a2c333646c731c16aa7085815b62fb4cd91fea7e0fa7c4cf7057ad5a3d888bdd7ae1798ac4864b7c590582d60a5f39de1bb909b0bca54562ec1ada72c6470a6f7a003850bdc43ecc84c714cadc0c2a0f223e3dc7e9541fdb9ff8d4e5c990ed9522a10291db1b094b26bf52f54dac58ac6318e46cc593e59a6630ef3c76d3a28d6ad510e580df939e8122045c40adc4a16af83e3b9c453934770f3fcf8b77245b56739f8fd2ec8951b5c1eb458203429bda30696e9c084dcaab5741fcdad127'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading autompg-dataset, 6461 bytes compressed
Downloaded and uncompressed: autompg-dataset
Data source import complete.


In [2]:
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv("/kaggle/input/autompg-dataset/auto-mpg.csv")
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [4]:
print(df.shape)
print(df.columns)

(398, 9)
Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model year', 'origin', 'car name'],
      dtype='object')


In [5]:
df['horsepower'] = pd.to_numeric(df['horsepower'],errors="coerce")
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130.0,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,1,ford torino


In [6]:
df.isnull().sum()

Unnamed: 0,0
mpg,0
cylinders,0
displacement,0
horsepower,6
weight,0
acceleration,0
model year,0
origin,0
car name,0


In [7]:
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].mean())

In [8]:
df.isnull().sum()

Unnamed: 0,0
mpg,0
cylinders,0
displacement,0
horsepower,0
weight,0
acceleration,0
model year,0
origin,0
car name,0


In [9]:
df = df.drop(['car name'],axis = 1)

In [10]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,18.0,8,307.0,130.0,3504,12.0,70,1
1,15.0,8,350.0,165.0,3693,11.5,70,1
2,18.0,8,318.0,150.0,3436,11.0,70,1
3,16.0,8,304.0,150.0,3433,12.0,70,1
4,17.0,8,302.0,140.0,3449,10.5,70,1


In [11]:
X =df.drop('mpg',axis = 1)
Y =df[['mpg']]

In [12]:
X.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,8,307.0,130.0,3504,12.0,70,1
1,8,350.0,165.0,3693,11.5,70,1
2,8,318.0,150.0,3436,11.0,70,1
3,8,304.0,150.0,3433,12.0,70,1
4,8,302.0,140.0,3449,10.5,70,1


In [13]:
Y.head()

Unnamed: 0,mpg
0,18.0
1,15.0
2,18.0
3,16.0
4,17.0


In [14]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [15]:
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=.2)

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', LinearRegression())
])

pipeline.fit(X, Y)

In [16]:
y_pred = pipeline.predict(x_test)

In [17]:
from sklearn.metrics import r2_score

r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2}")

R-squared: 0.798097656723562


In [18]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 11.919869846413283


In [19]:
print(pipeline.predict([[5,117,100,3000,15,85,1]]))

[[28.04635132]]




In [20]:
import pickle
with open('model.pkl', 'wb') as file:
    pickle.dump(pipeline, file)

In [21]:
model = pickle.load(open("model.pkl",'rb'))

In [22]:
print(model)

Pipeline(steps=[('scaler', StandardScaler()),
                ('regressor', LinearRegression())])


In [23]:
!pip install flask-ngrok



In [24]:
#!wget https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz
!tar -xvzf ngrok-v3-stable-linux-amd64.tgz
!ls

ngrok
model.pkl  ngrok  ngrok-v3-stable-linux-amd64.tgz  ngrok.yml  sample_data


In [25]:
!./ngrok config add-authtoken 2kgvAsjzTWXXdxM2SkphjHXQGHF_3REmfdXUP1b845msy8dgV

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [26]:
!cp /root/.config/ngrok/ngrok.yml .

In [27]:
#!pip install flask-ngrok
from flask import Flask,request
#from flask_ngrok import run_with_ngrok
import threading
import requests
import numpy as np
app = Flask(__name__)
#run_with_ngrok(app)

@app.route('/predict',methods=['POST'])
def home():
    cylinders    = int(request.args.get('cylinders',''))
    displacement = int(request.args.get('displacement',''))
    horsepower   = int(request.args.get('horsepower',''))
    weight       = int(request.args.get('weight',''))
    acceleration = int(request.args.get('acceleration',''))
    modelYear    = int(request.args.get('modelYear',''))
    origin       = int(request.args.get('origin',''))
    prediction   = model.predict([[cylinders,displacement,horsepower,weight,acceleration,modelYear,origin]])
    print("*****************************")
    print(prediction)
    return "Prediction is "+str(prediction[0])

def run():
    app.run(host='0.0.0.0', port=5000)

# Start Flask in a separate thread
thread = threading.Thread(target=run)
thread.start()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [29]:
import requests

# Replace with the ngrok public URL printed when you run the Flask app
ngrok_url = "http://0.0.0.0:5000/predict"  # e.g., "http://abc123.ngrok.io/predict"

# Prepare the request parameters
params = {
    'cylinders': 5,
    'displacement': 117,
    'horsepower': 100,
    'weight': 3000,
    'acceleration': 15,
    'modelYear': 85,
    'origin': 1
}

# Make the POST request
response = requests.post(ngrok_url, params=params)

# Print the response from the server
print(response.text)

INFO:werkzeug:127.0.0.1 - - [15/Aug/2024 11:15:16] "POST /predict?cylinders=5&displacement=117&horsepower=100&weight=3000&acceleration=15&modelYear=85&origin=1 HTTP/1.1" 200 -


*****************************
[[28.04635132]]
Prediction is [28.04635132]
