In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import jarque_bera, normaltest, ttest_1samp
from statsmodels.stats.stattools import durbin_watson

# Load your dataset from a CSV file
csv_file_path = "/content/drive/MyDrive/Dataset/Final Dataset Fifa/final-fd.csv"

# Load the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Print the column names to verify their existence
print(df.columns)

Index(['Age', 'Potential', 'Value', 'Finishing', 'HeadingAccuracy',
       'ShortPassing', 'Volleys', 'Dribbling', 'BallControl', 'SprintSpeed',
       'ShotPower', 'Penalties', 'BMI', 'Work Rate_NUM'],
      dtype='object')


In [None]:
# List of numerical columns to perform tests on
numerical_columns = ['Age', 'Potential', 'Value', 'Finishing', 'HeadingAccuracy',
       'ShortPassing', 'Volleys', 'Dribbling', 'BallControl', 'SprintSpeed',
       'ShotPower', 'Penalties', 'BMI', 'Work Rate_NUM']

# Iterate through each numerical column
for col in numerical_columns:
    data = df[col]

    # JB test
    jb_stat, jb_p_value = jarque_bera(data)
    print(f'Jarque-Bera Test for {col}:')
    print(f'  JB Test Statistic: {jb_stat}')
    print(f'  JB P-Value: {jb_p_value}')

    # P-value test
    p_value_stat, p_value = normaltest(data)
    print(f'P-Value Test for {col}:')
    print(f'  P-Value Test Statistic: {p_value_stat}')
    print(f'  P-Value: {p_value}')

    # T-test (one-sample T-test against a known population mean)
    population_mean = 0  # Replace with your known population mean
    t_stat, t_p_value = ttest_1samp(data, population_mean)
    print(f'T-Test for {col}:')
    print(f'  T-Test Statistic: {t_stat}')
    print(f'  T-Test P-Value: {t_p_value}')

    # Standard error
    std_error = np.std(data, ddof=1) / np.sqrt(len(data))
    print(f'Standard Error for {col}: {std_error}')

    # DW test
    # Assuming your data is time series data, you can perform the DW test
    dw_stat = durbin_watson(data)
    print(f'Durbin-Watson Test for {col}:')
    print(f'  Durbin-Watson Statistic: {dw_stat}')


Jarque-Bera Test for Age:
  JB Test Statistic: 67.28712567916433
  JB P-Value: 2.447858486871244e-15
P-Value Test for Age:
  P-Value Test Statistic: 183.04368770364954
  P-Value: 1.7888265888464298e-40
T-Test for Age:
  T-Test Statistic: 242.36693544936253
  T-Test P-Value: 0.0
Standard Error for Age: 0.1089920964385389
Durbin-Watson Test for Age:
  Durbin-Watson Statistic: 0.04601855592549768
Jarque-Bera Test for Potential:
  JB Test Statistic: 46.65675764599115
  JB P-Value: 7.389478764576348e-11
P-Value Test for Potential:
  P-Value Test Statistic: 44.25154323201208
  P-Value: 2.459798199693311e-10
T-Test for Potential:
  T-Test Statistic: 572.6435315970136
  T-Test P-Value: 0.0
Standard Error for Potential: 0.12577128104792412
Durbin-Watson Test for Potential:
  Durbin-Watson Statistic: 0.008641123424041426
Jarque-Bera Test for Value:
  JB Test Statistic: 1213440.1346365614
  JB P-Value: 0.0
P-Value Test for Value:
  P-Value Test Statistic: 2980.883732549376
  P-Value: 0.0
T-Test f

In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Load your dataset (replace 'your_dataset.csv' with the actual file path)
data = pd.read_csv('final-fd.csv')

# Extract the predictor variables (independent variables) for which you want to check multicollinearity
# Replace 'X1', 'X2', 'X3', etc. with the actual column names of your predictor variables
X = data[['Age', 'Potential', 'Value', 'Finishing', 'HeadingAccuracy',
       'ShortPassing', 'Volleys', 'Dribbling', 'BallControl', 'SprintSpeed',
       'ShotPower', 'Penalties', 'BMI', 'Work Rate_NUM']]

# Add a constant (intercept) term to the predictor variables
X['intercept'] = 1

# Calculate VIF values
vif = pd.DataFrame()
vif["Variable"] = X.columns
vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]

# Display the VIF values
print(vif)

           Variable         VIF
0               Age    4.134639
1         Potential    5.159410
2             Value    1.648926
3         Finishing    5.680155
4   HeadingAccuracy    2.475547
5      ShortPassing    3.655907
6           Volleys    3.515040
7         Dribbling    5.170225
8       BallControl    7.228989
9       SprintSpeed    1.719306
10        ShotPower    3.295521
11        Penalties    1.930449
12              BMI    1.132264
13    Work Rate_NUM    1.082589
14        intercept  726.429708
