Нужно реализовать rest api на базе flask в google colab.

1. выбрать себе датасет (который интересен или нравится больше всего, можно глянуть здесь https://economic-caper-a4c.notion.site/d062c410b90145bca90fc23b1348c813), сделать pipeline (преобразования + модель), сохранить его на диск. Если не хочется пайплайн, то можно без него, но так вам же будет удобнее потом вызывать его из кода сервиса.
2. Реализовать ноутбук с сервером
3. Реализовать ноутбук с клиентом

In [1]:
!pip install dill



In [2]:
import pandas as pd
import dill
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve
from sklearn.metrics import f1_score

#working with text
from sklearn.feature_extraction.text import TfidfVectorizer

#normalizing data
from sklearn.preprocessing import StandardScaler

#pipeline
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.metrics import precision_score,recall_score

#imputer
from sklearn.impute import SimpleImputer

import sklearn.datasets

In [3]:
df = pd.read_csv("./train.csv")
df.head(5)

Unnamed: 0,Id,Title,Body,Tags,CreationDate,Y
0,34552656,Java: Repeat Task Every Random Seconds,<p>I'm already familiar with repeating tasks e...,<java><repeat>,2016-01-01 00:21:59,LQ_CLOSE
1,34553034,Why are Java Optionals immutable?,<p>I'd like to understand why Java 8 Optionals...,<java><optional>,2016-01-01 02:03:20,HQ
2,34553174,Text Overlay Image with Darkened Opacity React...,<p>I am attempting to overlay a title over an ...,<javascript><image><overlay><react-native><opa...,2016-01-01 02:48:24,HQ
3,34553318,Why ternary operator in swift is so picky?,"<p>The question is very simple, but I just cou...",<swift><operators><whitespace><ternary-operato...,2016-01-01 03:30:17,HQ
4,34553755,hide/show fab with scale animation,<p>I'm using custom floatingactionmenu. I need...,<android><material-design><floating-action-but...,2016-01-01 05:21:48,HQ


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45000 entries, 0 to 44999
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Id            45000 non-null  int64 
 1   Title         45000 non-null  object
 2   Body          45000 non-null  object
 3   Tags          45000 non-null  object
 4   CreationDate  45000 non-null  object
 5   Y             45000 non-null  object
dtypes: int64(1), object(5)
memory usage: 2.1+ MB


In [5]:
df['Y'].value_counts()

LQ_CLOSE    15000
HQ          15000
LQ_EDIT     15000
Name: Y, dtype: int64

In [6]:
df['Y'] = df['Y'].apply(lambda x: 0 if x == 'HQ' else 1)

In [7]:
df['Y'].value_counts()

1    30000
0    15000
Name: Y, dtype: int64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(df, df['Y'], test_size=0.33, random_state=42)
# save test
X_test.to_csv("X_test.csv", index=None)
y_test.to_csv("y_test.csv", index=None)

# save train
X_train.to_csv("X_train.csv", index=None)
y_train.to_csv("y_train.csv", index=None)

In [9]:
class ColumnSelector(BaseEstimator, TransformerMixin):
    """
    Transformer to select a single column from the data frame to perform additional transformations on
    """
    def __init__(self, key):
        self.key = key

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X[self.key]
    

class TextImputer(BaseEstimator, TransformerMixin):
    def __init__(self, key, value):
        self.key = key
        self.value = value
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X[self.key] = X[self.key].fillna(self.value)
        return X

In [10]:
features = ['Title', 'Body', 'Tags']
target = 'Y'

In [11]:
# combine
Title = Pipeline([
                ('imputer', TextImputer('Title', '')),
                ('selector', ColumnSelector(key='Title')),
                ('tfidf', TfidfVectorizer())
            ])

Body = Pipeline([
                ('imputer', TextImputer('Body', '')),
                ('selector', ColumnSelector(key='Body')),
                ('tfidf', TfidfVectorizer())
            ])

Tags = Pipeline([
                ('imputer', TextImputer('Tags', '')),
                ('selector', ColumnSelector(key='Tags')),
                ('tfidf', TfidfVectorizer())
            ])


feats = FeatureUnion([('Title', Title),
                      ('Body', Body),
                      ('Tags', Tags)])

In [12]:
%%time

pipeline = Pipeline([
    ('features', feats),
    ('classifier', LogisticRegression()),
])

pipeline.fit(X_train, y_train)

CPU times: total: 14.2 s
Wall time: 11.7 s


Pipeline(steps=[('features',
                 FeatureUnion(transformer_list=[('Title',
                                                 Pipeline(steps=[('imputer',
                                                                  TextImputer(key='Title',
                                                                              value='')),
                                                                 ('selector',
                                                                  ColumnSelector(key='Title')),
                                                                 ('tfidf',
                                                                  TfidfVectorizer())])),
                                                ('Body',
                                                 Pipeline(steps=[('imputer',
                                                                  TextImputer(key='Body',
                                                                              value='')),
   

In [13]:
pipeline.steps

[('features',
  FeatureUnion(transformer_list=[('Title',
                                  Pipeline(steps=[('imputer',
                                                   TextImputer(key='Title',
                                                               value='')),
                                                  ('selector',
                                                   ColumnSelector(key='Title')),
                                                  ('tfidf', TfidfVectorizer())])),
                                 ('Body',
                                  Pipeline(steps=[('imputer',
                                                   TextImputer(key='Body',
                                                               value='')),
                                                  ('selector',
                                                   ColumnSelector(key='Body')),
                                                  ('tfidf', TfidfVectorizer())])),
                       

In [14]:
with open("logreg_pipeline.dill", "wb") as f:
    dill.dump(pipeline, f)

In [15]:
X_test = pd.read_csv("X_test.csv")
y_test = pd.read_csv("y_test.csv")

In [16]:
X_test.head(3)

Unnamed: 0,Id,Title,Body,Tags,CreationDate,Y
0,54233203,not able to run function in php,<p>I'am not able to run a function in php oops...,<php><oop>,2019-01-17 09:54:10,1
1,39253307,Export GIT LOG into an Excel file,"<p>I have looked into the forum, but with no l...",<git><logging>,2016-08-31 15:13:57,0
2,58793066,How to summarize the employees by net revenue ...,8.\tThe sales director would like to reward th...,<sql>,2019-11-10 21:25:11,1


In [17]:
with open('logreg_pipeline.dill', 'rb') as in_strm:
    pipeline = dill.load(in_strm)

In [18]:
pipeline

Pipeline(steps=[('features',
                 FeatureUnion(transformer_list=[('Title',
                                                 Pipeline(steps=[('imputer',
                                                                  TextImputer(key='Title',
                                                                              value='')),
                                                                 ('selector',
                                                                  ColumnSelector(key='Title')),
                                                                 ('tfidf',
                                                                  TfidfVectorizer())])),
                                                ('Body',
                                                 Pipeline(steps=[('imputer',
                                                                  TextImputer(key='Body',
                                                                              value='')),
   

In [19]:
preds = pipeline.predict_proba(X_test)[:, 1]

pred_df = pd.DataFrame({'preds': preds})
pred_df.to_csv("test_predictions.csv", index=None)

In [20]:
preds[:10]

array([0.98797278, 0.32500596, 0.97923746, 0.8829708 , 0.95554451,
       0.91716488, 0.47180894, 0.06637664, 0.84354527, 0.97909931])

In [21]:
precision, recall, thresholds = precision_recall_curve(y_test, preds)

fscore = (2 * precision * recall) / (precision + recall)
# locate the index of the largest f score
ix = np.argmax(fscore)
print(f'Best Threshold={thresholds[ix]}, F-Score={fscore[ix]:.3f}, Precision={precision[ix]:.3f}, Recall={recall[ix]:.3f}')

Best Threshold=0.4690497980679384, F-Score=0.933, Precision=0.922, Recall=0.944


In [22]:
!pip3 install flask-ngrok



In [23]:
from flask import Flask, request, jsonify

In [24]:
app = Flask(__name__)
#run_with_ngrok(app)  # Start ngrok when app is run

@app.route("/a")
def hello():
    return "Hello World!"

if __name__ == '__main__':
    app.run(port=8082)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8082/ (Press CTRL+C to quit)


In [25]:
with open('logreg_pipeline.dill', 'rb') as in_strm:
    model = dill.load(in_strm)

In [26]:
X_test = pd.read_csv("X_test.csv")
y_test = pd.read_csv("y_test.csv")

In [27]:
app = Flask(__name__)

@app.route("/", methods=["GET"])
def general():
    return "Welcome to prediction process"

@app.route('/predict', methods=['POST'])
def predict():
    data = {"success": False}

    # ensure an image was properly uploaded to our endpoint
    Title, Body, Tags = "", "", ""
    
    request_json = request.get_json()
    
    if request_json["Title"]:
        description = request_json['Title']
    
    if request_json["Body"]:
        company_profile = request_json['Body']
                
    if request_json["Tags"]:
        benefits = request_json['Tags']
    
    print(description)  
    preds = model.predict_proba(pd.DataFrame({"Title": [Title],
                                              "Body": [Body],
                                              "Tags": [Tags]}))
    data["predictions"] = preds[:, 1][0]
    data["description"] = description
        # indicate that the request was a success
    data["success"] = True
    print('OK')

        # return the data dictionary as a JSON response
    return jsonify(data)


if __name__ == '__main__':
    app.run(port=8082)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8082/ (Press CTRL+C to quit)
127.0.0.1 - - [20/Jul/2023 22:43:38] "POST //predict HTTP/1.1" 200 -


How to summarize the employees by net revenue
OK


127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:3

How to get all the child records from different tables based on given parent ID in sql server
OK
How to get all the child records from different tables based on given parent ID in sql server
OK
Retrieve all except some data of the another table
OK
Pandas: read_html
OK
Reader Always gimme NULL
OK
php rearrange array elements based on condition
OK
How do I make a constructor for a derived class?
OK
how can i create a dynamic tow dimensional array in c++?
OK
Re-exporting ES6 modules in TS 1.7?
OK
Fetch API with Cookie
OK
Print list content in a given order
OK
c# - List all primes upto 100
OK
Angular2 exception: Token must be defined
OK
Form Validation project
OK
Most Pythonic way to kill a thread after some period of time
OK
Gulp error internal/child_process.js:298 throw errnoException(err, 'spawn'); Error: spawn EACCES
OK


127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:3

Filter Name with Starting Letter in C#
OK
Django ImageField upload_to path
OK
Compiling SASS in Windows 7
OK
to get or set the values of controls of a form froma class file
OK
i am new to pythn and was trying to fix indentation error : this is my code class Cylinder(object):
OK
I cant UPDATE datetime to MySQL
OK
How to correctly share JAX-RS 2.0 client
OK
argument of type'NoneType' is not itrable
OK
I am getting a StringIndexOutOfBoundsException when I attempt to use array parts from the children classes
OK
In python-telegram-bot how to get all participants of the group?
OK
GIT Split Repository directory preserving *move / renames* history
OK
React Native Post Request via Fetch throws Network Request Failed
OK
What is the mathematical definition of (f(n)) and O(f(n))
OK
Turn Android into USB host
OK
I want to insert a couple of sentences into an array
OK
Elasticsearch Bulk API - Index vs Create/Update
OK
SockJS Python Client
OK
Simple console graphics in C
OK
Hover effect is backward f

127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -
127.0.0.1 - - [20/Jul/2023 22:43:39] "POST //predict HTTP/1.1" 200 -



OK
Using R and plot.ly - how do I script saving my output as a webpage
OK
Does return ends function if ifelse is used?
OK
Why do we need field if we have property?
OK
Safe navigation operator (&.) for nil
OK
Fastest way to convert a integer to arbitrarily ordered byte arrays in JavaScript?
OK
Planning to make web app like Canva
OK
Cannot find alphabet symbol
OK
What is the easiest way to use material design in a react-native iOS app?
OK
cross or one to many in c#
OK
Need Help UnPIckling as String on Python
OK
How to log to journald (systemd) via Python?
OK
Python: how to capture image from webcam on click using OpenCV
OK
i have json file on local computer i want to use that file in javascript how can i use that in js
OK


In [28]:
Title_data, Body_data, Tags_data = ( 
    "How to summarize the employees by net revenue",
    "My data is look like below (usually more than",
    "<sql><sql-server>"
)

body = {
        'Title': Title, 
        'Body': Body,
        'Tags': Tags
        }

In [None]:
with app.test_client() as t:
    response = t.post('/predict', json=body)
    json_data = response.get_json()

json_data
