In [60]:
import mysql_connection
import pandas as pd

# Function to convert string representation of overs to decimal
def convert_to_decimal(overs_str):
    if '.' in overs_str:
        overs, balls = overs_str.split('.')
        decimal_overs = int(overs) + int(balls) / 6
    else:
        decimal_overs = int(overs_str)
    return decimal_overs

def fetch_data(team, opposite_team, players, filter_field):
    # Get MySQL connection
    connection = mysql_connection.get_mysql_connection()

    if connection:
        # Initialize an empty list to store DataFrames for each player
        dfs = []

        # Loop through each player
        for player in players:
            if opposite_team == 'All':
                # SQL query to select specific fields from the table for the given player
                sql_query = f"""
                SELECT DISTINCT b.match_id, b.team, b.opposite_team, b.player, b.overs, b.{filter_field}, m.date
                FROM cricket_info.bowling AS b
                JOIN cricket_info.master AS m ON b.match_id = m.match_id
                WHERE b.team = %s AND b.player LIKE %s;
                """
                # Execute SQL query with parameters
                cursor = connection.cursor()
                cursor.execute(sql_query, (team, f'%{player}%'))
            
            else:
                # SQL query to select specific fields from the table for the given player
                sql_query = f"""
                SELECT DISTINCT b.match_id, b.team, b.opposite_team, b.player, b.overs, b.{filter_field}, m.date
                FROM cricket_info.bowling AS b
                JOIN cricket_info.master AS m ON b.match_id = m.match_id
                WHERE b.team = %s AND b.opposite_team = %s AND b.player LIKE %s;
                """
                # Execute SQL query with parameters
                cursor = connection.cursor()
                cursor.execute(sql_query, (team, opposite_team, f'%{player}%'))

            # Fetch all rows from the result set
            rows = cursor.fetchall()

            # Create DataFrame from fetched rows
            df = pd.DataFrame(rows, columns=['match_id', 'team', 'opposite_team', 'player', 'overs', filter_field, 'date'])

            # Print the column names
            print(f"Columns for player {player}: {df.columns}")

            # Convert 'runs' field to float
            df[filter_field] = df[filter_field].astype(float)

            # Apply the function to 'Overs' column
            df['overs'] = df['overs'].apply(convert_to_decimal)

            # Calculate total overs bowled in each match
            total_overs_per_match = df.groupby('match_id')['overs'].sum()

            # Calculate ratio of filter field to total overs for each match
            df['ratio'] = df[filter_field] / total_overs_per_match[df['match_id']].values

            # Append the DataFrame for the current player to the list
            dfs.append(df)

            # Close cursor
            cursor.close()

        # Close connection
        connection.close()
        
        return dfs
    else:
        print("Connection to the database failed.")
        return None


if __name__ == "__main__":
    team = 'New Zealand'
    opposite_team = 'Sri Lanka'
    opposite_team = 'All'
    players = ['Tim Southee','Ish Sodhi']
    # players = ['Wanindu Hasaranga', 'Maheesh Theekshana','Tillakaratne Dilshan','Angelo Mathews']
    filter_field = 'runs'

    # Fetch data for multiple players
    dfs = fetch_data(team, opposite_team, players, filter_field)

    # Merge all players DataFrames into master_df
    master_df = pd.concat(dfs, ignore_index=True)

    master_df['date'] = pd.to_datetime(master_df['date'])
    
    # Sort DataFrame by 'date'
    master_df = master_df.sort_values(by='date', ascending=True)

    # # Plot data for multiple players
    # plot_data(master_df, players)

print(master_df)

Connected to MySQL database
Columns for player Tim Southee: Index(['match_id', 'team', 'opposite_team', 'player', 'overs', 'runs', 'date'], dtype='object')
Columns for player Ish Sodhi: Index(['match_id', 'team', 'opposite_team', 'player', 'overs', 'runs', 'date'], dtype='object')
        match_id         team opposite_team       player     overs  runs  \
35     T20I # 53  New Zealand       England  Tim Southee  4.000000  38.0   
80     T20I # 54  New Zealand       England  Tim Southee  4.000000  22.0   
77     T20I # 78  New Zealand   West Indies  Tim Southee  4.000000  39.0   
37     T20I # 79  New Zealand   West Indies  Tim Southee  4.000000  44.0   
79     T20I # 83  New Zealand     Australia  Tim Southee  4.000000  31.0   
..           ...          ...           ...          ...       ...   ...   
113  T20I # 2438  New Zealand      Pakistan  Tim Southee  4.000000  19.0   
220  T20I # 2438  New Zealand      Pakistan    Ish Sodhi  4.000000  22.0   
1    T20I # 2481  New Zealand     

In [61]:
from bowling_linear_regression_model import predict_player_performance

# Assuming 'master_df' is already loaded
player_name = 'Tim Southee'
mse = predict_player_performance(player_name, master_df)
print('linear Regression Predicted Ratio:', mse)


Mean Squared Error: 8.217301096052207e-31
linear Regression Predicted Ratio: 8.217301096052207e-31


In [68]:
from bowling_random_forest_model import predict_player_performance

# Assuming 'master_df' is already loaded
player_name = 'Tim Southee'
mse = predict_player_performance(player_name, master_df)
print('Random ForestPredicted Ratio:', mse)


Mean Squared Error: 0.012215273502139426
Random ForestPredicted Ratio: 0.012215273502139426


In [69]:
from bowling_SVR_model import predict_player_performance

# Assuming 'master_df' is already loaded
player_name = 'Tim Southee'
mse = predict_player_performance(player_name, master_df)
print('SVR Predicted Ratio:', mse)


Mean Squared Error: 0.002759532336682627
SVR Predicted Ratio: 0.002759532336682627


In [67]:
from bowling_decision_trees_model import predict_player_performance

# Assuming 'master_df' is already loaded
player_name = 'Tim Southee'
mse = predict_player_performance(player_name, master_df)
print('Decision Trees Predicted Ratio:', mse)


ValueError: Unknown label type: (array([ 2.76923077,  3.        ,  3.33333333,  3.75      ,  4.        ,
        4.5       ,  5.        ,  5.25      ,  5.5       ,  5.71428571,
        6.        ,  6.25      ,  6.5       ,  7.        ,  7.25      ,
        7.5       ,  7.57894737,  7.75      ,  8.        ,  8.33333333,
        8.4       ,  8.5       ,  8.57142857,  8.72727273,  8.75      ,
        8.85714286,  9.        ,  9.25      ,  9.5       ,  9.75      ,
       10.        , 10.5       , 10.75      , 10.8       , 11.        ,
       11.2173913 , 11.25      , 11.5       , 11.75      , 12.        ,
       12.25      , 13.        , 15.        , 16.        ]),)