In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.graphics.gofplots import qqplot
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA

data_url = "/content/dataset-7dd735871fec70d4a93837a26503470804b7d7e4.csv"
df = pd.read_csv(data_url)

df.head()

In [None]:
# Box plot
plt.figure(figsize=(10, 6))
sns.boxplot(x='WarningBlocker', y='CBO', data=df)
plt.title('Box Plot')
plt.show()

In [None]:
# Bar chart
plt.figure(figsize=(10, 6))
sns.barplot(x='WarningCritical', y='CBO', data=df)
plt.title('Bar Chart')
plt.show()

In [None]:
df['WarningMinor'] = pd.to_datetime(df['WarningMinor'])

# Control chart
plt.figure(figsize=(12, 8))
sns.lineplot(x='WarningMinor', y='CBO', data=df)
plt.title('Control Chart')
plt.show()

In [None]:
# Scatter plot
plt.figure(figsize=(10, 6))
sns.scatterplot(x='CBO', y='DIT', data=df)
plt.title('Scatter Plot')
plt.show()

In [None]:
correlation_matrix = df.corr()

correlation_matrix

In [None]:
robust_corr_matrix = df.corr(method='spearman')

robust_corr_matrix

In [None]:
X = df[['CBO']]
y = df['DIT']

model = LinearRegression()
model.fit(X, y)

print('Intercept:', model.intercept_)
print('Coefficient:', model.coef_[0])

In [None]:
huber_model = HuberRegressor()
huber_model.fit(X, y)

print('Intercept (Robust):', huber_model.intercept_)
print('Coefficient (Robust):', huber_model.coef_[0])

In [None]:
X_multi = df[['CBO', 'NII', 'LOC']]
multi_model = LinearRegression()
multi_model.fit(X_multi, y)

# Display the coefficients
print('Intercept (Multivariate):', multi_model.intercept_)
print('Coefficients (Multivariate):', multi_model.coef_)

In [None]:
features = ['CBO', 'NII', 'LOC']

df_std = (df[features] - df[features].mean()) / df[features].std()

pca = PCA(n_components=2)
principal_components = pca.fit_transform(df_std)

pc_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])

# Display the first few rows
pc_df.head()