# Statistical Analysis

This notebook conducts statistical analysis on data loaded from Google Sheets.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.oauth2 import service_account
import gspread
from gspread_dataframe import get_as_dataframe

## Load Data from Google Sheets

In [2]:
def load_google_sheet_data(sheet_url, worksheet_name=None, credentials_file=None):
    """
    Load data from Google Sheets
    
    Args:
        sheet_url: URL of the Google Sheet or sheet ID
        worksheet_name: Name of the worksheet (optional, defaults to first sheet)
        credentials_file: Path to service account JSON file (optional)
    
    Returns:
        pandas.DataFrame: The loaded data
    """
    try:
        if credentials_file:
            # Use service account credentials
            gc = gspread.service_account(filename=credentials_file)
        else:
            # Use default authentication (will prompt for OAuth)
            gc = gspread.oauth()
        
        # Open the spreadsheet
        if 'docs.google.com' in sheet_url:
            sheet = gc.open_by_url(sheet_url)
        else:
            sheet = gc.open_by_key(sheet_url)
        
        # Get the worksheet
        if worksheet_name:
            worksheet = sheet.worksheet(worksheet_name)
        else:
            worksheet = sheet.get_worksheet(0)  # First worksheet
        
        # Convert to DataFrame
        df = get_as_dataframe(worksheet)
        
        # Remove empty rows and columns
        df = df.dropna(how='all').dropna(axis=1, how='all')
        
        return df
        
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

In [3]:
# Load your data
# Replace with your Google Sheet URL
SHEET_URL = "https://docs.google.com/spreadsheets/d/1EX83SL2E6Cg_Y1y88e8svOvqS6SjZd11rIKaeX_3ABs/edit?gid=0#gid=0"

# Load the data
df = load_google_sheet_data(SHEET_URL)

if df is not None:
    print("Data loaded successfully!")
    print(f"Shape: {df.shape}")
    print("\nFirst few rows:")
    display(df.head())
    print("\nData types:")
    print(df.dtypes)
else:
    print("Failed to load data. Please check your sheet URL and permissions.")

Error loading data: [Errno 2] No such file or directory: '/Users/kylenessen/.config/gspread/credentials.json'
Failed to load data. Please check your sheet URL and permissions.


## Data Exploration and Analysis

In [None]:
# Basic data exploration
if 'df' in locals() and df is not None:
    print("Dataset Overview:")
    print(f"Rows: {len(df)}")
    print(f"Columns: {len(df.columns)}")
    print(f"\nColumn names: {list(df.columns)}")
    
    print("\nMissing values:")
    print(df.isnull().sum())
    
    print("\nBasic statistics:")
    display(df.describe())

In [None]:
# Add your statistical analysis here
# Example: correlation analysis, hypothesis testing, etc.