In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import jarque_bera, normaltest, ttest_1samp
from statsmodels.stats.stattools import durbin_watson

# Load your dataset from a CSV file
csv_file_path = "/content/drive/MyDrive/Dataset/Final Dataset Fifa/final-df.csv"

# Load the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Print the column names to verify their existence
print(df.columns)

Index(['Age', 'Potential', 'Value', 'ShortPassing', 'Balance', 'Strength',
       'Interceptions', 'SlidingTackle', 'BMI', 'Work Rate_NUM'],
      dtype='object')


In [None]:
# List of numerical columns to perform tests on
numerical_columns = ['Age', 'Potential', 'ShortPassing', 'Balance', 'Strength',
                     'Interceptions', 'SlidingTackle', 'Work Rate_NUM', 'BMI', 'Value']

# Iterate through each numerical column
for col in numerical_columns:
    data = df[col]

    # JB test
    jb_stat, jb_p_value = jarque_bera(data)
    print(f'Jarque-Bera Test for {col}:')
    print(f'  JB Test Statistic: {jb_stat}')
    print(f'  JB P-Value: {jb_p_value}')

    # P-value test
    p_value_stat, p_value = normaltest(data)
    print(f'P-Value Test for {col}:')
    print(f'  P-Value Test Statistic: {p_value_stat}')
    print(f'  P-Value: {p_value}')

    # T-test (one-sample T-test against a known population mean)
    population_mean = 0  # Replace with your known population mean
    t_stat, t_p_value = ttest_1samp(data, population_mean)
    print(f'T-Test for {col}:')
    print(f'  T-Test Statistic: {t_stat}')
    print(f'  T-Test P-Value: {t_p_value}')

    # Standard error
    std_error = np.std(data, ddof=1) / np.sqrt(len(data))
    print(f'Standard Error for {col}: {std_error}')

    # DW test
    # Assuming your data is time series data, you can perform the DW test
    dw_stat = durbin_watson(data)
    print(f'Durbin-Watson Test for {col}:')
    print(f'  Durbin-Watson Statistic: {dw_stat}')


Jarque-Bera Test for Age:
  JB Test Statistic: 134.24960275642118
  JB P-Value: 7.048053022140634e-30
P-Value Test for Age:
  P-Value Test Statistic: 347.9017054413481
  P-Value: 2.845145893335285e-76
T-Test for Age:
  T-Test Statistic: 380.8448489953365
  T-Test P-Value: 0.0
Standard Error for Age: 0.06969965201601588
Durbin-Watson Test for Age:
  Durbin-Watson Statistic: 0.05148042132528354
Jarque-Bera Test for Potential:
  JB Test Statistic: 60.85910929485948
  JB P-Value: 6.089930408537049e-14
P-Value Test for Potential:
  P-Value Test Statistic: 58.64583906511202
  P-Value: 1.8416956135310162e-13
T-Test for Potential:
  T-Test Statistic: 873.3876118041011
  T-Test P-Value: 0.0
Standard Error for Potential: 0.08285023006187517
Durbin-Watson Test for Potential:
  Durbin-Watson Statistic: 0.00916305261752554
Jarque-Bera Test for ShortPassing:
  JB Test Statistic: 451.716417054733
  JB P-Value: 8.147537049328821e-99
P-Value Test for ShortPassing:
  P-Value Test Statistic: 304.39017723

**# Column for DF**
data = df['Age', 'Potential', 'ShortPassing', 'Balance', 'Strength',
       'Interceptions', 'SlidingTackle', 'Work Rate_NUM', 'BMI_floor', 'Value_of_log']

In [2]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Load your dataset (replace 'your_dataset.csv' with the actual file path)
data = pd.read_csv('final-df.csv')

# Extract the predictor variables (independent variables) for which you want to check multicollinearity
# Replace 'X1', 'X2', 'X3', etc. with the actual column names of your predictor variables
X = data[['Age', 'Potential', 'ShortPassing', 'Balance', 'Strength',
          'Interceptions', 'SlidingTackle', 'Work Rate_NUM', 'BMI']]

# Add a constant (intercept) term to the predictor variables
X['intercept'] = 1

# Calculate VIF values
vif = pd.DataFrame()
vif["Variable"] = X.columns
vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

# Display the VIF values
print(vif)


        Variable         VIF
0            Age    3.183914
1      Potential    4.501473
2   ShortPassing    2.246277
3        Balance    2.015055
4       Strength    2.229485
5  Interceptions    4.814536
6  SlidingTackle    4.032895
7  Work Rate_NUM    1.116806
8            BMI    1.166046
9      intercept  761.335138
