In [1]:
#!pip install flask_sqlalchemy

In [1]:
from sqlalchemy import create_engine
from flask_sqlalchemy import SQLAlchemy
from flask import Flask
from flask import request,jsonify
import json
import psycopg2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score,mean_squared_error,accuracy_score,confusion_matrix   #r2_score is a score on discrete values
import math

In [2]:
db_string='postgres://postgres:123456@localhost:5432/cricketalpha'
db = create_engine(db_string)

In [11]:
def predict_method(features_train,features_test,labels_train,labels_test,model):
        model.fit(features_train,labels_train) #train reandom forest regression model
        predict_output = model.predict(features_test) # we are predicting runs_scored(labels)  /// independent variable
        return r2_score(labels_test,predict_output),predict_output,model

In [12]:
app = Flask(__name__)

@app.route("/teamscoringruns", methods=["POST"])
def team_runs_score_against_team():
    
    #running the query to get the runs scored by a team
#     print(request.get_json().get('team_one'))
    query="select m.match_id,m.match_type,innings_one_team,innings_two_team,venue_id,sum(d.total_runs) as runs_scored from delivery as d inner join match as m ON d.match_id =m.match_id where innings_one_team = {} and innings_two_team = {} and d.inning =1 group by m.match_type,m.match_id,innings_one_team,innings_two_team,venue_id;".format(request.get_json().get('team_one'),request.get_json().get('team_two'))
    data = {}
    if (len(query) == 0):
        data.update({"status":400})
        data.update({"message":"No result found for these teams"})
        return jsonify(data)
    if (len(query) <=20 ):
        data.update({"status":200})
        data.update({"message":"very less matches played between the teams cannot predict"})
        return jsonify(data)
    else:
        #converting the sql data into dataframe
        df = pd.read_sql_query(query,db)

        # encoding match type ODI,T20 and test ----> 0 is odi, 1 is t20, 2 is test ### 1:- T20, 0:- ODI, 2:- Test
        encode = LabelEncoder()

        df['match_type'] = encode.fit_transform(df['match_type']) 

        # assigning the features and labels with train test split
        labels = np.array(df['runs_scored']).reshape(-1,1)
        features = df.drop(['match_id','runs_scored'],axis=1)

        features_train,features_test,labels_train,labels_test = train_test_split(features,labels,test_size = 0.20)

        ##########  USING RANDOM FOREST REGRESSOR
        r2score, pred_Rf,modelRandomFr = predict_method(features_train,features_test,labels_train,labels_test,RandomForestRegressor(n_estimators = 50))
        print('RandomForestRegressor RMSE: ',math.sqrt(mean_squared_error(labels_test,pred_Rf))) #labels test- predicted value, labels is expected values and predicted values are which the model predict
        print('r2_score:',r2score)

        if(r2score < 0.40):
            data.update({"status":200})
            data.update({"message":"cannot predict"})
            data.update({"r2score":r2score})
            return jsonify(data)

        ### using random forest regressor to predict

        result = modelRandomFr.predict([[request.get_json().get("match_type"),request.get_json().get("team_one"),request.get_json().get("team_one"),request.get_json().get("venue_id")]])[0]

        res = str(int(result))
        r2score = round(r2score*100,2)
    #     data = {}
        data.update({"status":200})
        data.update({"data":[{"prediction":res}]})
        data.update({"scoring_probability":r2score})
        data.update({"message":"model predicted successfully"})
        return jsonify(data)
    
# # print(request.data.some)
if __name__ == '__main__':
    app.run(debug=False, port=5200)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5200/ (Press CTRL+C to quit)
  
127.0.0.1 - - [21/Oct/2019 18:15:38] "[37mPOST /teamscoringruns HTTP/1.1[0m" 200 -


RandomForestRegressor RMSE:  62.61010137493953
r2_score: 0.6289235949639644


In [None]:
# dummy = df_ODI
# dummy = dummy.query("venue_id == 1")
# encode = LabelEncoder()
# df['match_type'] = encode.fit_transform(df['match_type'])  ### 1:- T20, 0:- ODI, 2:- Test
# df.head()

In [None]:
# df_remove_duplicate = df[['match_type','inning_one_team','inning_two_team','venue_id','runs_scored']]
# df_remove_duplicate = df_remove_duplicate.drop_duplicates()
# # df_remove_duplicate.duplicated()
# new_df = df_remove_duplicate
# new_df.head()

In [None]:
# labels = np.array(df['runs_scored']).reshape(-1,1)
# features = df.drop(['match_id','runs_scored'],axis=1)

In [None]:
# features.head()

In [None]:
# scaler = StandardScaler()  #scaler function standardize the data between the range -1 to 1
# features= scaler.fit_transform(features)
# features
# # labels = scaler.fit_transform(labels)

In [None]:
# def predict_method(features_train,features_test,labels_train,labels_test,model):
#     model.fit(features_train,labels_train) #train reandom forest regression model
#     predict_output = model.predict(features_test) # we are predicting runs_scored(labels)  /// independent variable
#     return r2_score(labels_test,predict_output),predict_output,model

In [None]:
# ##########  USING Decsion tree REGRESSOR
# r2score, pred_Decision,modeldecTree = predict_method(features_train,features_test,labels_train,labels_test,DecisionTreeRegressor())
# print('DecisionTreeRegressor RMSE: ',math.sqrt(mean_squared_error(labels_test,pred_Decision))) #labels test- predicted value, labels is expected values and predicted values are which the model predict
# print('r2_score:',r2score)

In [None]:
# ##########  USING RANDOM FOREST REGRESSOR
# r2score, pred_Rf,modelRandomFr = predict_method(features_train,features_test,labels_train,labels_test,RandomForestRegressor(n_estimators = 50))
# print('RandomForestRegressor RMSE: ',math.sqrt(mean_squared_error(labels_test,pred_Rf))) #labels test- predicted value, labels is expected values and predicted values are which the model predict
# print('r2_score:',r2score)

In [None]:
# new_df.head(35)

In [None]:
# modelRandomFr.predict([[1,1,2,1]])

In [None]:
# pred_Rf

In [None]:
# encode.classes_