In [316]:
# Checkpoint to begin loading the Showdown data set

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats as sp

pokemon_counts_df = pd.read_csv("./dataset/showdown/showdown.csv")

New columns have been added to the Showdown dataset to reflect whether a team can be considered a Weather team, specifically taking advantage of the Rain or Sandstorm. This is done through determining whether a team of Pokemon includes a Politoed and either a Tentacruel or Thunderus-Therian for Rain teams, while Sand teams need a Tyranitar and Alakazam, Landorus-Therian, or both.

In [317]:
elo_ranges = ["1000-1290", "1291-1390", "1391-1490", "1491-2000"]

pokemon_counts_df = pokemon_counts_df[["Elo", "Weather-Rain", "Weather-Sand"]]
pokemon_counts_df

Unnamed: 0,Elo,Weather-Rain,Weather-Sand
0,1444,0,0
1,1365,0,0
2,1306,1,0
3,1488,1,0
4,1377,0,0
...,...,...,...
4995,1347,0,0
4996,1513,1,0
4997,1316,1,0
4998,1415,1,0


The elo of each player has also been classified into different ranges similar to the Kruskal-Wallis test.

In [318]:
elo_ranges = [1000, 1291, 1391, 1491, 2001]
elo_labels = ["1000-1290", "1291-1390", "1391-1490", "1491-2000"]

pokemon_counts_df["ELO Range"] = pd.cut(
    pokemon_counts_df["Elo"],
    bins=elo_ranges,
    labels=elo_labels,
    right=False
)

After classifying each Player to an Elo Range, their team is determined to be classified as a weather team or not by checking if the team takes advantage of at least one weather between Rain or Sandstorm. This is done since a team may take advantage of both Rain and Sandstorm as the classification done when each log was converted to a csv did not take this into account.

In [319]:
pokemon_counts_df["IsWeather"] = ((pokemon_counts_df["Weather-Rain"] == 1) | (pokemon_counts_df["Weather-Sand"] == 1)).astype(int)
pokemon_counts_df["NotWeather"] = (~pokemon_counts_df["IsWeather"].astype(bool)).astype(int)
pokemon_counts_df

Unnamed: 0,Elo,Weather-Rain,Weather-Sand,ELO Range,IsWeather,NotWeather
0,1444,0,0,1391-1490,0,1
1,1365,0,0,1291-1390,0,1
2,1306,1,0,1291-1390,1,0
3,1488,1,0,1391-1490,1,0
4,1377,0,0,1291-1390,0,1
...,...,...,...,...,...,...
4995,1347,0,0,1291-1390,0,1
4996,1513,1,0,1491-2000,1,0
4997,1316,1,0,1291-1390,1,0
4998,1415,1,0,1391-1490,1,0


After creating the necessary columns, the data is summarized into a new data frame that only has the important information needed to conduct a test of independence using the Chi-Square test.

In [320]:
summary_table = pokemon_counts_df.groupby("ELO Range", observed=True)[["IsWeather", "NotWeather"]].sum().reset_index()
summary_table

Unnamed: 0,ELO Range,IsWeather,NotWeather
0,1000-1290,85,164
1,1291-1390,753,1590
2,1391-1490,604,1067
3,1491-2000,272,465


A second summary table was created in order to clearly show the observed data with the corresponding column and row totals.

In [321]:
display_summary_table = summary_table

# First, compute the sum of each column
weather_sum = summary_table["IsWeather"].sum()
notweather_sum = summary_table["NotWeather"].sum()

# Create new rows as Series objects
TotalTeams = pd.Series(
    {"ELO Range": "TotalTeams", "IsWeather": weather_sum, "NotWeather": notweather_sum, "PlayerCount": weather_sum + notweather_sum}
)

# Append them to the summary_table
display_summary_table = pd.concat([display_summary_table, pd.DataFrame([TotalTeams])], ignore_index=True)


display_summary_table["PlayerCount"] = display_summary_table["IsWeather"] + display_summary_table["NotWeather"]
display_summary_table = display_summary_table.set_index("ELO Range").transpose()
display_summary_table


ELO Range,1000-1290,1291-1390,1391-1490,1491-2000,TotalTeams
IsWeather,85,753,604,272,1714
NotWeather,164,1590,1067,465,3286
PlayerCount,249,2343,1671,737,5000


After finishing the Observed table, the Expected table is created by multiplying the row and column total then dividing by the grand total, done for each row and column.

In [322]:
summary_table.set_index("ELO Range", inplace=True)

summary_table["RowTotal"] = summary_table.sum(axis=1)

col_totals = summary_table[["IsWeather", "NotWeather"]].sum()

grand_total = col_totals.sum()

expected = pd.DataFrame(index=summary_table.index, columns=["IsWeather", "NotWeather"])

for col in ["IsWeather", "NotWeather"]:
    for idx in summary_table.index:
        expected.loc[idx, col] = (summary_table.loc[idx, "RowTotal"] * col_totals[col]) / grand_total

observed = summary_table.drop(columns="RowTotal")

print("Observed Counts:")
print(observed)

print("Expected Counts:")
print(expected)

Observed Counts:
           IsWeather  NotWeather
ELO Range                       
1000-1290         85         164
1291-1390        753        1590
1391-1490        604        1067
1491-2000        272         465
Expected Counts:
          IsWeather NotWeather
ELO Range                     
1000-1290   85.3572   163.6428
1291-1390  803.1804  1539.8196
1391-1490  572.8188  1098.1812
1491-2000  252.6436   484.3564


After determining the Observed and Expected counts for each team in each Elo range, a Chi-Square test is done.

Using the following measures, the test is done.
##### α = 0.05
##### H_0: The usage of weather teams is independent of a player's Elo rating, i.e. players use weather teams at the same rate regardless of their Elo rating.
##### H_a: The usage of weather teams is dependent on a player's Elo rating, i.e. the likelihood of using a weather team changes based on the player's Elo rating.

In [323]:
observed_flat = observed.astype(float).to_numpy().flatten()
expected_flat = expected.astype(float).to_numpy().flatten()

# Run chi-square test
chi2_stat, p_value = sp.chisquare(f_obs=observed_flat, f_exp=expected_flat)

print(f"Chi-square statistic: {chi2_stat:.4f}")
print(f"p-value: {p_value:.4f}")

Chi-square statistic: 9.6119
p-value: 0.2117


Since the p-value gotten after the Chi-Square test is greater than α, we fail to reject the null hypothesis. With this, we can conclude that the usage of weather teams does not depend on a player's Elo rating and the usage of weather teams does not depend on a player's Elo.