# Packages

In [2]:
import warnings
warnings.filterwarnings("ignore")
import os

import pandas as pd
import numpy as np
import polars as pl
import scipy.stats as stats
import seaborn as sns
import statsmodels.formula.api as smf
import statsmodels.api as sm
import matplotlib.pyplot as plt
import math

from statsmodels.tsa.deterministic import DeterministicProcess, CalendarFourier
from statsmodels.graphics.tsaplots import plot_pacf

from sklearn.model_selection import GroupKFold, KFold, train_test_split, cross_val_score, TimeSeriesSplit
from sklearn.preprocessing import RobustScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score, make_scorer, accuracy_score, median_absolute_error, classification_report, mean_absolute_error, roc_auc_score, roc_curve
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from imblearn.over_sampling import RandomOverSampler
from scipy.optimize import minimize

import xgboost as xgb
import lightgbm as lgb
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor, RandomForestClassifier
from sklearn.linear_model import Ridge
from sklearn.svm import LinearSVC
from sklearn.svm import SVR
from sklearn.linear_model import LogisticRegression

import optuna

import requests
import re
import tabula
import pdfplumber

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.float_format', lambda x: "%.4f" % x)
# pd.options.plotting.backend = "plotly"

plt.style.use('ggplot')
sns.set_style('darkgrid')

# Reading Data

In [None]:
dir_name = "/Users/kylewang/Documents/yale/SDS4250_StatCaseStudies/OlympicFigureSkating/OWG10_ScoresCSV_SOV"
file_name = "owg10_Ladies_FS_Scores_SOV.csv"
full_path = dir_name + "/" + file_name

df = pd.read_csv(full_path)
df.head()

Unnamed: 0,rank,name,country,starting_number,total_segment_score,total_element_score,total_component_score,deductions,type,category,base_value,highlight,goe,factor,j1,j2,j3,j4,j5,j6,j7,j8,j9,scores_of_panel
0,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,3Lz+3T,10.0,0.0,2.0,,2.0,2.0,2.0,3.0,2.0,2.0,0.0,2.0,2.0,12.0
1,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,3F,5.5,0.0,1.8,,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,7.3
2,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,2A+2T+2Lo,6.3,0.0,1.4,,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,7.7
3,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,FCoSp4,3.0,0.0,0.8,,0.5,0.5,0.5,1.0,0.5,0.5,1.0,1.0,1.0,3.8
4,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,SpSq4,3.4,0.0,2.0,,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,5.4


In [4]:
df.shape

(432, 24)

In [5]:
display(df)

Unnamed: 0,rank,name,country,starting_number,total_segment_score,total_element_score,total_component_score,deductions,type,category,base_value,highlight,goe,factor,j1,j2,j3,j4,j5,j6,j7,j8,j9,scores_of_panel
0,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,3Lz+3T,10.0,0.0,2.0,,2.0,2.0,2.0,3.0,2.0,2.0,0.0,2.0,2.0,12.0
1,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,3F,5.5,0.0,1.8,,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,7.3
2,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,2A+2T+2Lo,6.3,0.0,1.4,,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,7.7
3,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,FCoSp4,3.0,0.0,0.8,,0.5,0.5,0.5,1.0,0.5,0.5,1.0,1.0,1.0,3.8
4,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,SpSq4,3.4,0.0,2.0,,2.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,5.4
5,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,2A+3T,7.5,0.0,2.0,,2.0,2.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,9.5
6,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,3S,4.95,0.0,1.4,,1.0,0.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,6.35
7,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,3Lz,6.6,0.0,2.0,,2.0,3.0,2.0,3.0,2.0,2.0,2.0,1.0,2.0,8.6
8,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,SlSt3,3.3,0.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.5,1.0,1.0,4.3
9,1,KIM Yu-Na,KOR,21,150.06,78.3,71.76,,ee,2A,3.85,0.0,1.4,,1.0,1.0,1.0,2.0,2.0,1.0,2.0,1.0,1.0,5.25
