## Do general imports

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import math as math
from sklearn import preprocessing
from feature_engine.discretisation import ArbitraryDiscretiser

import preprocessing.DiscretizeTotalProcessingStepsPreProcess as dtpsp
import preprocessing.ProjectsPreProcess as ppp
import exploration.analytics_plots as vs

pd.set_option('display.max_columns', None)


## Load Datasets

In [None]:
issues_df = pd.read_csv('./temp_data/issues.csv', index_col=["id"])
issues_df['issue_created'] = pd.to_datetime(issues_df['issue_created'])
print(F'Total records in dataset %i' % len(issues_df))

In [None]:
types = ['Ticket','Deployment','HD Service']

issues_df = issues_df[(issues_df['issue_proj'].str.match('\w{2}\d{2}\w{1,}'))
                      & (issues_df['issue_type'].isin(types))
                      & (issues_df['issue_created'] >= '2022-01-01')
                      & (issues_df['issue_created'] <= '2022-12-31')
                      & pd.notna(issues_df['issue_resolution_date'])]
                      # & (issues_df['issue_priority'] == 'High')]
print(F'Total records after filter %i' % len(issues_df))

issues_df.head(1)

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# 0.54616479 +0.26273805+ 0.1083671 + 0.08273006
features = issues_df[['wf_total_time','processing_steps','issue_contr_count','issue_comments_count']]
for key in features.columns:
    scaler = StandardScaler()
    features.loc[:,key] = scaler.fit_transform(features.loc[:,key].values.reshape(-1, 1))

pca = PCA(n_components = 'mle', svd_solver = 'full')
# pca = PCA(n_components = 4)
fitted = pca.fit(features)
x_r = fitted.transform(features)
print(f'Variance: {fitted.explained_variance_}') # eigen values
print(f'Variance Ratio: {fitted.explained_variance_ratio_}') # eigen values importance
print(f'Components: \n{fitted.components_}') # eigen vectors (n_components X n_features -> features are 4 at the top

# print(f'features in: {fitted.feature_names_in_}')
# print(f'features out: {fitted.get_feature_names_out()}')
print(f'covariance:\n{fitted.get_covariance()}')

In [None]:
import matplotlib.pyplot as plt

# fig = plt.figure(figsize=(20, 5))
# ax = fig.add_subplot(111)
ax = plt.figure(figsize=(8,8)).add_subplot(projection='3d')

ax.view_init(elev=20, azim=-15)
ax.scatter(x_r[:,0],x_r[:,1],x_r[:,2])
