In [None]:
import pandas as pd
import numpy as np
import glob
import os
import re
from scipy.stats import entropy
import plotly.express as px
import plotly.graph_objects as go

In [None]:
df_isc_part1 = pd.read_csv("/content/drive/MyDrive/Thesis/ISC Part1/RCS_250407_1430_5S2P_1SC_Run1_1.csv")

In [None]:
len(df_isc_part1)

60598

In [None]:
csv_files = glob.glob("/content/drive/MyDrive/Thesis/ISC Part1/RCS_*_5S2P_*_Run1_1.csv")

df_list = []

for file in csv_files:
    match = re.search(r"RCS_\d+_\d+_5S2P_(.*?)_Run1_1\.csv", os.path.basename(file))
    tag = match.group(1) if match else "unknown"

    df = pd.read_csv(file)
    df["SC"] = tag

    df_list.append(df)

df_all = pd.concat(df_list, ignore_index=True)

print(df_all.head())
print("Total files loaded:", len(df_list))

   Time       V1        V2        V3        V4        V5      IApp       DV1  \
0  1.00  8.14142  8.012606  8.006502  8.142005  8.142474 -0.024797  0.128814   
1  1.05  8.14142  8.012353  8.006249  8.141751  8.143113 -0.026511  0.129067   
2  1.10  8.14142  8.012606  8.006502  8.142005  8.142474 -0.024797  0.128814   
3  1.15  8.14142  8.012353  8.006249  8.141751  8.143113 -0.026511  0.129067   
4  1.20  8.14142  8.012606  8.006502  8.142005  8.142474 -0.024797  0.128814   

        DV2       DV3       DV4       DV5        DT  FaultIN   SC  
0  0.006104 -0.135503 -0.000470  0.001054  0.021406        0  1SC  
1  0.006104 -0.135502 -0.001361  0.001693  0.021406        0  1SC  
2  0.006104 -0.135503 -0.000470  0.001054  0.021406        0  1SC  
3  0.006104 -0.135502 -0.001361  0.001693  0.021406        0  1SC  
4  0.006104 -0.135503 -0.000470  0.001054  0.021406        0  1SC  
Total files loaded: 8


In [None]:
df_all

Unnamed: 0,Time,V1,V2,V3,V4,V5,IApp,DV1,DV2,DV3,DV4,DV5,DT,FaultIN,SC
0,1.00,8.141420,8.012606,8.006502,8.142005,8.142474,-0.024797,0.128814,0.006104,-0.135503,-0.000470,0.001054,0.021406,0,1SC
1,1.05,8.141420,8.012353,8.006249,8.141751,8.143113,-0.026511,0.129067,0.006104,-0.135502,-0.001361,0.001693,0.021406,0,1SC
2,1.10,8.141420,8.012606,8.006502,8.142005,8.142474,-0.024797,0.128814,0.006104,-0.135503,-0.000470,0.001054,0.021406,0,1SC
3,1.15,8.141420,8.012353,8.006249,8.141751,8.143113,-0.026511,0.129067,0.006104,-0.135502,-0.001361,0.001693,0.021406,0,1SC
4,1.20,8.141420,8.012606,8.006502,8.142005,8.142474,-0.024797,0.128814,0.006104,-0.135503,-0.000470,0.001054,0.021406,0,1SC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544503,3654.90,7.407145,7.201145,7.194736,7.409281,7.409892,-0.023823,0.206000,0.006409,-0.214545,-0.000611,0.002747,1.025004,0,10SC
544504,3654.95,7.406611,7.200611,7.194202,7.408747,7.409892,-0.023823,0.206000,0.006409,-0.214545,-0.001145,0.003281,1.021359,0,10SC
544505,3655.00,7.406408,7.200408,7.193999,7.408544,7.409892,-0.023823,0.206000,0.006409,-0.214545,-0.001348,0.003484,1.021359,0,10SC
544506,3655.05,7.406535,7.200535,7.194126,7.408671,7.409944,-0.023823,0.206000,0.006409,-0.214545,-0.001273,0.003409,1.021359,0,10SC


In [None]:
df_all.SC.value_counts()

Unnamed: 0_level_0,count
SC,Unnamed: 1_level_1
22SC,75503
51SC,74296
8p2SC,74253
10SC,73083
6p8SC,69289
100SC,65596
1SC,60598
4p7SC,51890


In [None]:
for sc_value in df_all["SC"].unique():
    df_sc = df_all[df_all["SC"] == sc_value]

    corr_matrix = df_sc.corr(numeric_only=True)

    fig = px.imshow(
        corr_matrix,
        text_auto=True,
        color_continuous_scale="RdBu_r",
        title=f"Correlation Heatmap for SC = {sc_value}",
        labels=dict(x="Variable", y="Variable", color="Correlation")
    )

    fig.update_layout(
        xaxis_tickangle=45,
        height=700,
        width=800
    )

    fig.show()


In [None]:
import plotly.graph_objects as go

variables_to_plot = [col for col in df_all.columns if col not in ["Time", "FaultIN", "SC"]]

for sc_value in df_all["SC"].unique():
    print("SC value:", sc_value)
    df_sc = df_all[df_all["SC"] == sc_value]

    fig = go.Figure()

    for var in variables_to_plot:
        fig.add_trace(go.Scatter(
            x=df_sc["Time"],
            y=df_sc[var],
            mode='lines',
            name=var
        ))

    fault_times = df_sc[df_sc["FaultIN"] == 1]["Time"]
    for ft in fault_times:
        fig.add_vline(
            x=ft,
            line=dict(color="red", width=1, dash="dash")
        )

    fig.update_layout(
        title=f"Variables Over Time with Fault Markers (SC = {sc_value})",
        xaxis_title="Time (seconds)",
        yaxis_title="Value",
        legend=dict(orientation="h"),
        height=600
        )

    fig.show()


SC value: 1SC
