In [4]:
import mysql.connector
import pandas as pd

# Define connection parameters
config = {
  'user': 'airflow',
  'password': 'airflow',
  'host': 'localhost',  # or your MySQL container IP address
  'port': '3306',        # or the port you've mapped to your MySQL container
  'database': 'cricket_info',
  'raise_on_warnings': True
}

# Establish connection
try:
    connection = mysql.connector.connect(**config)
    print("Connected to MySQL database")
except mysql.connector.Error as err:
    print(f"Error: {err}")

# Perform database operations and create DataFrame
if 'connection' in locals():
    # SQL query to select specific fields from the table for both teams
    sql_query = """
    SELECT team_1, team_1_score, team_1_wicket, team_1_over, match_date
    FROM matches
    UNION ALL
    SELECT team_2, team_2_score, team_2_wicket, team_2_over, match_date
    FROM matches
    """

    # Read data into a DataFrame
    df = pd.read_sql(sql_query, connection)

    # Rename columns
    df.columns = ['team', 'team_score', 'team_wicket', 'team_over', 'match_date']

    # Print the DataFrame
    print(df)

    # Close connection
    connection.close()
else:
    print("Connection to the database failed.")

source_df = df

Connected to MySQL database
              team  team_score  team_wicket team_over    match_date
0     South Africa         133           10        20  Oct 21, 2005
1          England         179            8        20  Jun 13, 2005
2      New Zealand         170           10        20  Feb 17, 2005
3      New Zealand         116            5      18.3  Dec 26, 2006
4      New Zealand         162            8        20  Dec 22, 2006
...            ...         ...          ...       ...           ...
5157      Zimbabwe         143            5        20  Jan 14, 2024
5158      Pakistan         173           10      19.3  Jan 14, 2024
5159      Pakistan         180           10        18  Jan 12, 2024
5160   Afghanistan         158            5        20  Jan 11, 2024
5161   Afghanistan         126            9        20   Jan 2, 2024

[5162 rows x 5 columns]


  df = pd.read_sql(sql_query, connection)


In [6]:
import pandas as pd
import matplotlib.pyplot as plt
df = source_df

# Handle alternative date format
def parse_date(date_str):
    try:
        return pd.to_datetime(date_str, format='%b %d, %Y')
    except ValueError:
        return pd.to_datetime(date_str)

# Convert 'match_date' column to datetime
df['match_date'] = df['match_date'].apply(parse_date)

# Extract year from 'match_date'
df['year'] = df['match_date'].dt.year

# Convert 'team_over' to float
df['team_over'] = df['team_over'].apply(lambda x: float(x.split('.')[0]) + float(x.split('.')[1]) / 10 if '.' in x else float(x))

# Calculate total balls
df['total_balls'] = df['team_over'] * 6

# Calculate run rate
df['run_rate'] = df['team_score'] / df['total_balls']

# Group by team and year and calculate average run rate
grouped_df = df.groupby(['team', 'year']).agg(avg_run_rate=('run_rate', 'mean')).reset_index()

# Plotting
plt.figure(figsize=(10, 6))
for team in grouped_df['team'].unique():
    team_data = grouped_df[grouped_df['team'] == team]
    plt.plot(team_data['year'], team_data['avg_run_rate'], label=team)

plt.xlabel('Year')
plt.ylabel('Average Run Rate')
plt.title('Average Run Rate Year-wise for Each Country')
plt.legend()
plt.grid(True)
plt.xticks(range(2005, 2025))  # Set x-axis ticks from 2005 to 2024
plt.xlim(2005, 2024)  # Set x-axis limits from 2005 to 2024
# Save the graph
plt.savefig('Average Run Rate Year-wise for Each Country.png')

plt.show()

TypeError: argument of type 'float' is not iterable