# Class Distribution

In [None]:
import pandas as pd
labels = []
data_dir = "/Users/samskanderi/Documents/Research_Project/SWANSF/code/labels/"

for i in range(1,6):
    labels.append(pd.read_csv(data_dir + "partition" + str(i) + "_labels.csv"))

In [None]:
# Create dictionary for counts of each flare type
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

flare_types = {"X": [0,0,0,0,0], "M": [0,0,0,0,0], "C": [0,0,0,0,0], "B": [0,0,0,0,0], "FQ": [0,0,0,0,0]}
color = ['#f95a00','#d1bc3f','#3b657b','#6590a6','#a6c5d5']
plt.style.use('seaborn-v0_8')


font = {'family' : 'Tahoma',
        'weight' : 'normal',
        'size'   : 8}

matplotlib.rc('font', **font)

    
for i in range(1,6):

    # For each objects flare type sort into M, X, C, B, or FQ, otherwise skip
    for j in labels[i-1]['FLARE_TYPE']:
        if "M" in j:
            flare_types["M"][i-1] +=1
        elif "X" in j:
            flare_types["X"][i-1] +=1
        elif "C" in j:
            flare_types["C"][i-1] +=1
        elif "B" in j:
            flare_types["B"][i-1] +=1
        elif "FQ" in j:
            flare_types["FQ"][i-1] +=1
        else:
            continue


group = ("Partition1", "Partition2", "Partition3", "Partition4", "Partition5")

x = np.arange(len(group))  # the label locations
width = 0.10  # the width of the bars
multiplier = 0

fig, ax = plt.subplots(layout='constrained', sharey=True,
  figsize=(9, 4))

for attribute, measurement in flare_types.items():
    offset = width * multiplier * 1.7
    rects = ax.bar(x + offset, measurement, width, label=attribute, color = color[multiplier] )
    ax.bar_label(rects, padding=3)
    multiplier += 1

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Sample Size')
ax.set_title('SWAN-SF Class Distribution')
ax.set_xticks(x + width, group)
ax.legend(loc='upper left', ncols=5)
ax.set_ylim(0, 90000)

plt.show()

# Attributes Visualization

In [None]:
sample_flare_index = [0,0,0,0,0]

for i in range(0,len(labels[0]['FLARE_CLASS'])):
    if labels[0]['FLARE_CLASS'][i] == 'X':
        sample_flare_index[0] = i
    elif labels[0]['FLARE_CLASS'][i] == 'M':
        sample_flare_index[1] = i
    elif labels[0]['FLARE_CLASS'][i] == 'C':
        sample_flare_index[2] = i
    elif labels[0]['FLARE_CLASS'][i] == 'B':
        sample_flare_index[3] = i
    elif labels[0]['FLARE_CLASS'][i] == 'FQ':
        sample_flare_index[4] = i
    
    count = 0
    for j in range(0,5):
        if sample_flare_index[j] != 0:
            count += 1
    if count == 5:
        break
       

In [None]:
from scipy.stats import zscore
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=2)

color = ['#f95a00','#d1bc3f','#3b657b','#6590a6','#a6c5d5']
label = ['X', 'M', 'C', 'B', 'FQ']

def plot_time_series_class(data, class_name, ax, n_steps=10):
    
    time_series_df = pd.DataFrame(data)
    
    for i in range(0,5):
        
        temp = time_series_df[i].to_numpy()
        new_2d = temp.reshape(len(time_series_df[i]), 1)
        new_1d = imputer.fit_transform(new_2d)[:,0]
        time_series_df[i] = zscore(pd.DataFrame(new_1d))
        
    
        smooth_path = time_series_df[i].rolling(n_steps).mean()
        ax.plot(smooth_path, linestyle='-', linewidth=1, label=label[i], color=color[i])
    

    ax.set_ylabel(class_name)
    ax.legend();

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('seaborn-v0_8')


font = {'family' : 'Tahoma',
        'weight' : 'normal',
        'size'   : 8}

matplotlib.rc('font', **font)

abt_header = ['R_VALUE','TOTUSJH','TOTBSQ','TOTPOT','TOTUSJZ','ABSNJZH','SAVNCPP',
                           'USFLUX','TOTFZ','MEANPOT', 'EPSX', 'EPSY','EPSZ','MEANSHR','SHRGT45','MEANGAM',
                              'MEANGBT','MEANGBZ','MEANGBH','MEANJZH','TOTFY','MEANJZD','MEANALP','TOTFX']

fig, axs = plt.subplots(12,2, sharey=True,
  figsize=(10, 36))

sample_flare_mvts = np.zeros((60,25,5))

for i in range(0,5):
    sample_flare_mvts[:,:, i] = data[0][:,:, sample_flare_index[i]]
    
    timestamp = sample_flare_mvts[:,0, i]

    start_time = str(timestamp[0])[0:4] + '-' + str(timestamp[0])[4:6] + '-' + str(timestamp[0])[6:8] \
                + ' ' + str(timestamp[0])[8:10] + ":" + str(timestamp[0])[10:12]

    end_time = str(timestamp[59])[0:4] + '-' + str(timestamp[59])[4:6] + '-' + str(timestamp[59])[6:8] \
                + ' ' + str(timestamp[59])[8:10] + ":" + str(timestamp[59])[10:12]

    if i == 0:
        print('Class: X') 
    elif i == 1:
        print('Class: M') 
    elif i == 2:
        print('Class: C') 
    elif i == 3:
        print('Class: B') 
    else:
        print('Class: FQ') 
        
    print('Start Time: ' + start_time + '  and  End Time: ' + end_time)


for i, cls in enumerate(abt_header):
    ax = axs.flat[i]
    data = sample_flare_mvts[:,i+1,:]
    
    plot_time_series_class(data, cls, ax)

fig.tight_layout();
    