In [None]:
import pandas as pd

# Load the dataset into a pandas DataFrame
df = pd.read_csv('pkmn_tcg_matches.csv')

# Display the first few rows of the DataFrame
df.head()

In [None]:
# Create a new column that represents whether each game was a win (1) or a loss (0)
df['Win'] = (df['Outcome'] == 'Win').astype(int)

# Display the first few rows of the DataFrame
df.head()

In [None]:
# Calculate the cumulative sum of the 'Win' column
df['Cumulative Wins'] = df['Win'].cumsum()

# Calculate the total number of games played up to each point
df['Games Played'] = df.index + 1

# Calculate the win ratio
df['Win Ratio'] = df['Cumulative Wins'] / df['Games Played']

# Display the first few rows of the DataFrame
df.head()

In [None]:
!pip install matplotlib

In [None]:
!pip install seaborn

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Create a plot
plt.figure(figsize=(10, 6))
sns.lineplot(x='Games Played', y='Win Ratio', data=df, marker='o', color='blue')

# Add title and labels
plt.title('Win Ratio Over Time')
plt.xlabel('Number of Games Played')
plt.ylabel('Win Ratio')

# Show the plot
plt.show()

In [None]:
df_lost_box = df[df['Deck'].str.contains("Lost Box") & ~df['Deck'].str.contains("Goodra")]
df_lost_box['Cumulative Wins'] = df_lost_box['Win'].cumsum()



In [None]:
len(df_lost_box)

In [None]:
# Recalculate the 'Games Played' column to reflect the number of games played with the 'Lost Box' deck
df_lost_box['Games Played'] = np.arange(1, len(df_lost_box) + 1)

# Recalculate the win ratio
df_lost_box['Win Ratio'] = df_lost_box['Cumulative Wins'] / df_lost_box['Games Played']

# Display the first few rows of the DataFrame
df_lost_box.head()

In [None]:
# Extract the deck variation
df_lost_box['Deck Variation'] = df_lost_box['Deck'].str.replace('Lost Box', '').str.strip()

# Replace all instances of '' in the 'Deck Variation' column with 'Lost Box'
df_lost_box['Deck Variation'] = df_lost_box['Deck Variation'].replace('', 'Lost Box')

df_lost_box['Deck Variation'] = df_lost_box['Deck Variation'].str.replace('(', '').str.replace(')', '')


In [None]:
# Create a plot
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Games Played', y='Win Ratio', hue='Deck Variation', data=df_lost_box, marker='o', palette='tab10')
sns.lineplot(x='Games Played', y='Win Ratio', data=df_lost_box, color='gray')

# Add title and labels
plt.title('Win Ratio Over Time (Lost Box Decks)')
plt.xlabel('Number of Games Played')
plt.ylabel('Win Ratio')

# Extend the y-axis limit from 0 to 1
plt.ylim(0, 1.1)

# Move the legend to the lower left
plt.legend(bbox_to_anchor=(0, 0), loc='lower left', title='Deck Variation')
# Show the plot
plt.show()

In [None]:
# Group the DataFrame by the opponent's deck and calculate the sum and count of the 'Win' column for each group
win_sum = df_lost_box.groupby('Opponent’s deck')['Win'].sum()
game_count = df_lost_box.groupby('Opponent’s deck')['Win'].count()

# Divide the sum by the count to get the win ratio for each opponent's deck
win_ratio = win_sum / game_count

# Sort the win ratios in descending order
win_ratio_sorted = win_ratio.sort_values(ascending=False)

# Display the win ratios
win_ratio_sorted

In [None]:
# Get the top 10 most common opponent decks
top_decks = df_lost_box['Opponent’s deck'].value_counts().index[:10]

# Filter the DataFrame to include only the rows where the 'Opponent’s deck' column is one of the top 10 most common opponent decks
df_top_decks = df_lost_box[df_lost_box['Opponent’s deck'].isin(top_decks)]

# Group the DataFrame by the 'Opponent’s deck' column and calculate the mean of the 'Win' column
win_ratios = df_top_decks.groupby('Opponent’s deck')['Win'].mean()

# Sort the win ratios in descending order
win_ratios = win_ratios.sort_values(ascending=False)

# Display the win ratios
win_ratios

In [None]:
# Create a bar chart of the win ratios
plt.figure(figsize=(10, 6))
sns.barplot(x=win_ratios.values, y=win_ratios.index, palette='viridis')

# Add title and labels
plt.title('Win Ratios Against Top 10 Opponent Decks')
plt.xlabel('Win Ratio')
plt.ylabel('Opponent Deck')

# Show the plot
plt.show()

In [None]:
# Replace all instances of 'Regieleki' in the 'Opponent’s deck' column with 'Miraidon ex'
df_lost_box['Opponent’s deck'] = df_lost_box['Opponent’s deck'].replace('Regieleki', 'Miraidon ex')

# Specify the decks to include
decks = ['Miraidon ex', 'Chien-Pao', 'Gardevoir', 'Random', 'Giratina', 'Lost Box', 'Mew', 'Lugia']

# Filter the DataFrame to include only the rows where the 'Opponent’s deck' column is one of the specified decks
df_filtered = df_lost_box[df_lost_box['Opponent’s deck'].isin(decks)]

# Group the DataFrame by the 'Opponent’s deck' column and calculate the mean of the 'Win' column
win_ratios = df_filtered.groupby('Opponent’s deck')['Win'].mean()

# Group the DataFrame by the 'Opponent’s deck' column and count the number of matches
match_counts = df_filtered['Opponent’s deck'].value_counts()

# Sort the win ratios and match counts in descending order
win_ratios = win_ratios.sort_values(ascending=False)
match_counts = match_counts.loc[win_ratios.index]

# Display the win ratios and match counts
win_ratios, match_counts

In [None]:
# Create a bar chart of the win ratios
plt.figure(figsize=(10, 6))
barplot = sns.barplot(x=win_ratios.values, y=win_ratios.index, palette='viridis')

# Add the number of matches against each deck on the bars
for i, count in enumerate(match_counts):
    barplot.text(win_ratios[i], i, f' (n={count})', va='center')

# Add title and labels
plt.title('Win Ratios Against Specified Opponent Decks')
plt.xlabel('Win Ratio')
plt.ylabel('Opponent Deck')

# Show the plot
plt.show()

In [None]:
# Group the DataFrame by the 'Deck Variation' column and calculate the mean of the 'Win' column
win_ratios = df_lost_box.groupby('Deck Variation')['Win'].mean()

# Group the DataFrame by the 'Deck Variation' column and count the number of matches
match_counts = df_lost_box['Deck Variation'].value_counts()

# Sort the win ratios and match counts in descending order
win_ratios = win_ratios.sort_values(ascending=False)
match_counts = match_counts.loc[win_ratios.index]

# Display the win ratios and match counts
win_ratios, match_counts

In [None]:
# Create a bar chart of the win ratios
plt.figure(figsize=(10, 6))
barplot = sns.barplot(x=win_ratios.values, y=win_ratios.index, palette='viridis')

# Add the number of matches for each deck variation on the bars
for i, count in enumerate(match_counts):
    barplot.text(win_ratios[i], i, f' (n={count})', va='center')

# Add title and labels
plt.title('Win Ratios for Each Lost Box Deck Variation')
plt.xlabel('Win Ratio')
plt.ylabel('Deck Variation')

# Show the plot
plt.show()

In [None]:
# Replace all instances of '' in the 'Deck Variation' column with 'Lost Box'
df_lost_box['Deck Variation'] = df_lost_box['Deck Variation'].replace('', 'Lost Box')

# Remove the parentheses from the 'Deck Variation' column
df_lost_box['Deck Variation'] = df_lost_box['Deck Variation'].str.replace('(', '').str.replace(')', '')

# Group the DataFrame by the 'Deck Variation' column and calculate the mean of the 'Win' column
win_ratios = df_lost_box.groupby('Deck Variation')['Win'].mean()

# Group the DataFrame by the 'Deck Variation' column and count the number of matches
match_counts = df_lost_box['Deck Variation'].value_counts()

# Sort the win ratios and match counts in descending order
win_ratios = win_ratios.sort_values(ascending=False)
match_counts = match_counts.loc[win_ratios.index]

# Display the win ratios and match counts
win_ratios, match_counts

In [None]:
# Create a bar chart of the win ratios
plt.figure(figsize=(10, 6))
barplot = sns.barplot(x=win_ratios.values, y=win_ratios.index, palette='viridis')

# Add the number of matches for each deck variation on the bars
for i, count in enumerate(match_counts):
    barplot.text(win_ratios[i], i, f' (n={count})', va='center')

# Add title and labels
plt.title('Win Ratios for Each Lost Box Deck Variation')
plt.xlabel('Win Ratio')
plt.ylabel('Deck Variation')

# Show the plot
plt.show()

In [None]:
# Calculate the overall win ratio for Lost Box decks
overall_win_ratio = df_lost_box['Win'].mean()

# Display the overall win ratio
overall_win_ratio

In [None]:
# Filter the DataFrame to include only the rows where the 'Win' column is 0 (loss)
df_lost_box_losses = df_lost_box[df_lost_box['Win'] == 0]

# Create a histogram of the 'Prizes left' column
plt.figure(figsize=(10, 6))
sns.histplot(df_lost_box_losses['Prizes left'], bins=range(7), kde=False, color='skyblue')

# Add title and labels
plt.title('Histogram of Prizes Left at the End of Lost Matches')
plt.xlabel('Prizes Left')
plt.ylabel('Frequency')

# Show the plot
plt.show()

In [None]:
# Create a histogram of the 'Prizes left' column, excluding 0
plt.figure(figsize=(10, 6))
sns.histplot(df_lost_box_losses[df_lost_box_losses['Prizes left'] > 0]['Prizes left'], bins=range(1, 7), kde=False, color='skyblue')

# Add title and labels
plt.title('Histogram of Prizes Left at the End of Lost Matches')
plt.xlabel('Prizes Left')
plt.ylabel('Frequency')

# Show the plot
plt.show()

In [None]:
# Calculate the mean and standard deviation of the 'Prizes left' column for lost matches
mean_prizes_left = df_lost_box_losses['Prizes left'].mean()
std_prizes_left = df_lost_box_losses['Prizes left'].std()

# Display the mean and standard deviation
mean_prizes_left, std_prizes_left

In [None]:
# Filter the DataFrame to include only the rows where the 'Win' column is 1 (win)
df_lost_box_wins = df_lost_box[df_lost_box['Win'] == 1]

# Create a histogram of the 'Opponent’s prizes' column
plt.figure(figsize=(10, 6))
sns.histplot(df_lost_box_wins['Opponent’s prizes'], bins=range(1, 7), kde=False, color='skyblue')

# Add title and labels
plt.title('Histogram of Opponent’s Prizes Left at the End of Won Matches')
plt.xlabel('Opponent’s Prizes Left')
plt.ylabel('Frequency')

# Show the plot
plt.show()

# Calculate the mean and standard deviation of the 'Opponent’s prizes' column for won matches
mean_opponent_prizes_left = df_lost_box_wins['Opponent’s prizes'].mean()
std_opponent_prizes_left = df_lost_box_wins['Opponent’s prizes'].std()

# Display the mean and standard deviation
mean_opponent_prizes_left, std_opponent_prizes_left

In [None]:
# Filter the DataFrame to include only the rows where the 'First?' column is 1 (went first)
df_lost_box_first = df_lost_box[df_lost_box['First?'] == 1]

# Calculate the win ratio for matches where you went first
first_win_ratio = df_lost_box_first['Win'].mean()

# Display the win ratio
first_win_ratio

In [None]:
# Filter the DataFrame to include only the rows where the 'First?' column is 1 (went first) and the 'Opponent’s deck' column contains 'Lost Box'
df_lost_box_first_vs_lost_box = df_lost_box_first[df_lost_box_first['Opponent’s deck'].str.contains('Lost Box')]

# Calculate the win ratio for matches where you went first against other Lost Box decks
first_vs_lost_box_win_ratio = df_lost_box_first_vs_lost_box['Win'].mean()

# Display the win ratio
first_vs_lost_box_win_ratio

In [None]:
df_lost_box.iloc[7:9]

In [None]:
overall_win_ratio = df_lost_box['Win'].mean()
overall_win_ratio

In [None]:
overall_win_ratio = df_lost_box['Win'].mean()
overall_win_ratio