In [3]:
# Install required libraries if not already installed
!pip install pandas matplotlib seaborn openpyxl

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def analyze_arcade_data(file_path):
    """
    Analyze arcade game data with focus on speed, angle, and pins

    Parameters:
    file_path (str): Path to the Excel file containing arcade game data
    """
    # Read the Excel file
    df = pd.read_excel(file_path)

    # Ensure column names are lowercase
    df.columns = df.columns.str.lower()

    # 1. Basic Data Exploration
    print("Dataset Overview:")
    print(df.info())

    # 2. Visualization for Speed 1 and Angle 1 vs Pins 1
    plt.figure(figsize=(15, 6))

    # Subplot 1: Speed 1 vs Pins 1
    plt.subplot(1, 2, 1)
    plt.scatter(df['speed 1'], df['pins 1'], alpha=0.7, color='blue')
    plt.title('Speed 1 vs Pins 1')
    plt.xlabel('Speed 1')
    plt.ylabel('Pins 1')

    # Subplot 2: Angle 1 vs Pins 1
    plt.subplot(1, 2, 2)
    plt.scatter(df['angle 1'], df['pins 1'], alpha=0.7, color='red')
    plt.title('Angle 1 vs Pins 1')
    plt.xlabel('Angle 1')
    plt.ylabel('Pins 1')

    plt.tight_layout()
    plt.savefig('speed_angle_1_vs_pins_1.png')
    plt.close()

    # 3. Visualization for Speed 2 and Angle 2 vs Pins 2
    plt.figure(figsize=(15, 6))

    # Subplot 1: Speed 2 vs Pins 2
    plt.subplot(1, 2, 1)
    plt.scatter(df['speed 2'], df['pins 2'], alpha=0.7, color='green')
    plt.title('Speed 2 vs Pins 2')
    plt.xlabel('Speed 2')
    plt.ylabel('Pins 2')

    # Subplot 2: Angle 2 vs Pins 2
    plt.subplot(1, 2, 2)
    plt.scatter(df['angle 2'], df['pins 2'], alpha=0.7, color='purple')
    plt.title('Angle 2 vs Pins 2')
    plt.xlabel('Angle 2')
    plt.ylabel('Pins 2')

    plt.tight_layout()
    plt.savefig('speed_angle_2_vs_pins_2.png')
    plt.close()

# Specify the path to your Excel file
file_path = 'arcade data.xlsx'  # Modify this to match your file location

# Run the analysis
analyze_arcade_data(file_path)

Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   run         10 non-null     float64
 1   player      40 non-null     int64  
 2   speed 1     40 non-null     object 
 3   speed 2     40 non-null     object 
 4   angle 1     40 non-null     object 
 5   angle 2     40 non-null     object 
 6   pins 1      40 non-null     int64  
 7   pins 2      40 non-null     int64  
 8   total pins  40 non-null     int64  
dtypes: float64(1), int64(4), object(4)
memory usage: 2.9+ KB
None


In [4]:
def analyze_arcade_data(file_path):
  """
  Analyzes arcade game data with focus on speed, angle, and pins.

  Args:
    file_path: Path to the Excel file containing arcade game data.

  Returns:
    A dictionary containing DataFrames with summary statistics for each
    combination of speed, angle, and pins.
  """

  df = pd.read_excel(file_path)

  summary_stats = {}

  # Speed 1 vs Pins 1
  speed1_categories = ['fast', 'slow', 'medium']
  for speed1 in speed1_categories:
    pins1_for_speed1 = df[df['speed 1'] == speed1]['pins 1']
    summary_stats[f'speed1_{speed1}'] = pins1_for_speed1.describe()

  # Speed 2 vs Pins 2
  speed2_categories = ['fast', 'slow', 'medium']  # Assuming same categories for speed 2
  for speed2 in speed2_categories:
    pins2_for_speed2 = df[df['speed 2'] == speed2]['pins 2']
    summary_stats[f'speed2_{speed2}'] = pins2_for_speed2.describe()

  # Angle 1 vs Pins 1
  angle1_categories = ['straight', 'right', 'left']  # Assuming categories for angle 1
  for angle1 in angle1_categories:
    pins1_for_angle1 = df[df['angle 1'] == angle1]['pins 1']
    summary_stats[f'angle1_{angle1}'] = pins1_for_angle1.describe()

  # Angle 2 vs Pins 2
  angle2_categories = ['straight', 'right', 'left']  # Assuming categories for angle 2
  for angle2 in angle2_categories:
    pins2_for_angle2 = df[df['angle 2'] == angle2]['pins 2']
    summary_stats[f'angle2_{angle2}'] = pins2_for_angle2.describe()

  return summary_stats

# Specify the path to your Excel file
file_path = 'arcade data.xlsx'

# Run the analysis
summary_stats = analyze_arcade_data(file_path)

# Print or further analyze the results
for key, stats_df in summary_stats.items():
  print(f"Summary Statistics for {key}:\n")
  print(stats_df)
  print("\n")

Summary Statistics for speed1_fast:

count    28.000000
mean      6.214286
std       2.393775
min       0.000000
25%       5.750000
50%       7.000000
75%       8.000000
max       9.000000
Name: pins 1, dtype: float64


Summary Statistics for speed1_slow:

count    3.0
mean     3.0
std      1.0
min      2.0
25%      2.5
50%      3.0
75%      3.5
max      4.0
Name: pins 1, dtype: float64


Summary Statistics for speed1_medium:

count    9.000000
mean     5.666667
std      2.061553
min      3.000000
25%      4.000000
50%      6.000000
75%      7.000000
max      9.000000
Name: pins 1, dtype: float64


Summary Statistics for speed2_fast:

count    25.000000
mean      1.520000
std       1.981582
min       0.000000
25%       0.000000
50%       1.000000
75%       3.000000
max       8.000000
Name: pins 2, dtype: float64


Summary Statistics for speed2_slow:

count    6.00000
mean     2.00000
std      1.67332
min      0.00000
25%      1.00000
50%      1.50000
75%      3.50000
max      4.00000
N

o Plot trends, such as how the throw angle impacts the number of pins knocked down.
o Identify patterns, anomalies, or relationships in the data.

Throwing bowling balls at a left angle will cause the least amount of pins knocked over. In the first throw, throwing straight and right had the highest mean at 6.1 pins knocked over, whereas throwing balls left has a mean of 4.7 pins. In the second throw, 1.7 pins were knocked over for throwing straight, 1.7 for throwing right, and 1.3 for throwing left. Becuase throwing straight and right have about the same mean in both throws, throwing the ball to the left always had a significant decrease in the amount of pins knocked over compared to throwing the ball to the right or straight.

Throwing the ball fast in the first throw will knock 6.2 pins down, 3.0 down for slow, and 5.6 pins down for throwing the ball medium. In the second throw, the data follows a similar trend with a mean of 1.5 pins knocked for throwing the ball fast, 2.0 pins for throwing the ball slowly, and 1.4 pins for throwing at a medium speed. The standard deviation for the 2nd slow throw is 1.7 however, so the median of 1.5 pins should be a better measure. Becuase there is so much data for throwing the ball fast compared to the other speeds and the first fast throw has a much larger differnce of pins knocked over compaed to the medium and slow throws, throwing the ball fast will cause the most amount of pins knocked over, followed by throwing the ball at a medium speed, and the least amount of pins knocked over through throwing the pin slowly.