# Uvoz podatko v PANAS in osnovna analiza

V tej beležnici bomo uvozili podatke iz datotek `Sx_quest.csv`, ki vsebuje rezultate vprašalnika PANAS, ter izvedli nekaj osnovnih analiz.

## PANAS lestvica
PANAS (Positive and Negative Affect Schedule) je vprašalnik za merjenje pozitivnih in negativnih občutkov. Običajno uporablja lestvico od 1 do 5, kjer:
- **1**: Zelo malo ali sploh ne
- **2**: Malo
- **3**: Zmerno
- **4**: Kar precej
- **5**: Zelo

V vaši datoteki, "PANAS 1 do 5" predstavlja odgovore na določena vprašanja, kjer vsako število označuje stopnjo intenzivnosti občutka za posamezno vprašanje.

## Nalaganje podatkov v Pandas DataFrame 

In [1]:
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource

# Omogočimo prikaz v Jupyter Notebooku
output_notebook()

# Osnovna mapa, kjer se nahajajo mape S2 do S17
base_dir = r"C:\Users\user\Desktop\WESAD_VSE"

# Seznam identifikatorjev oseb od 2 do 17
osebni_ids = range(2, 18)

# Prazen seznam za shranjevanje PANAS podatkov
panas_data = []

# Preberemo vsako datoteko in izvlečemo PANAS podatke
for person_id in osebni_ids:
    folder_name = f"S{person_id}"
    file_name = f"S{person_id}_quest.csv"
    file_path = os.path.join(base_dir, folder_name, file_name)
    try:
        with open(file_path, 'r') as file:
            for line in file:
                line = line.strip()
                if line.startswith('# PANAS'):
                    # Odstranimo '# PANAS' in razdelimo preostanek vrstice
                    data_values = line.replace('# PANAS', '').strip().split(';')
                    # Odstranimo morebitne prazne vrednosti
                    data_values = [value for value in data_values if value]
                    # Pretvorimo podatke v števila, upoštevamo 'Nan' kot None
                    data_values = [float(value) if value != 'Nan' else None for value in data_values]
                    # Dodamo 'Person' stolpec
                    data_values.append(person_id)
                    panas_data.append(data_values)
    except FileNotFoundError:
        print(f"Datoteka ne obstaja: {file_path}")
    except Exception as e:
        print(f"Napaka pri branju {file_path}: {e}")

# Ustvarimo DataFrame iz PANAS podatkov
if panas_data:
    max_columns = max(len(row) for row in panas_data)
    column_names = [f'PANAS_{i+1}' for i in range(max_columns - 1)] + ['Person']

    # Ustvarimo DataFrame
    panas_df = pd.DataFrame(panas_data, columns=column_names)

    # Prikaz prvih nekaj vrstic združenega DataFrame-a
    print(panas_df.head())
else:
    print("Seznam panas_data je prazen.")



Datoteka ne obstaja: C:\Users\user\Desktop\WESAD_VSE\S12\S12_quest.csv
   PANAS_1  PANAS_2  PANAS_3  PANAS_4  PANAS_5  PANAS_6  PANAS_7  PANAS_8  \
0      1.0      1.0      3.0      2.0      1.0      3.0      1.0      1.0   
1      3.0      2.0      4.0      1.0      3.0      3.0      1.0      2.0   
2      1.0      1.0      2.0      3.0      1.0      2.0      1.0      1.0   
3      1.0      1.0      2.0      3.0      1.0      1.0      1.0      1.0   
4      1.0      1.0      1.0      2.0      1.0      1.0      1.0      1.0   

   PANAS_9  PANAS_10  ...  PANAS_18  PANAS_19  PANAS_20  PANAS_21  PANAS_22  \
0      1.0       2.0  ...       4.0       2.0       2.0       2.0       1.0   
1      1.0       4.0  ...       4.0       2.0       3.0       3.0       3.0   
2      1.0       1.0  ...       3.0       1.0       1.0       1.0       1.0   
3      1.0       1.0  ...       3.0       1.0       1.0       1.0       2.0   
4      1.0       1.0  ...       2.0       1.0       1.0       1.0      

## Spoznavanje, vizualizacija signalov 

In [2]:


# List of signal columns (exclude 'Person')
signals = panas_df.columns.drop('Person')

# Plot histograms of signals using Bokeh
plots = []
for signal in signals:
    p = figure(title=f'Distribution of {signal}', x_axis_label=signal, y_axis_label='Frequency', width=400, height=400)
    hist, edges = np.histogram(panas_df[signal].dropna(), bins=30)
    p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color="navy", line_color="white", alpha=0.5)
    plots.append(p)

# Arrange plots in a grid
grid = gridplot(plots, ncols=3)
show(grid)





## Skaliranje vrednosti značilk (normalizacija) 

In [3]:


# Separate features and labels
X = panas_df.drop(['Person'], axis=1)

# Normalize features
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

# Convert back to DataFrame
X_normalized = pd.DataFrame(X_normalized, columns=X.columns)

# Combine normalized features with labels
data_normalized = pd.concat([X_normalized, panas_df['Person']], axis=1)
print(data_normalized.head())

   PANAS_1   PANAS_2  PANAS_3  PANAS_4  PANAS_5   PANAS_6  PANAS_7  PANAS_8  \
0      0.0  0.000000     0.50     0.25      0.0  0.666667      0.0     0.00   
1      0.5  0.333333     0.75     0.00      0.5  0.666667      0.0     0.25   
2      0.0  0.000000     0.25     0.50      0.0  0.333333      0.0     0.00   
3      0.0  0.000000     0.25     0.50      0.0  0.000000      0.0     0.00   
4      0.0  0.000000     0.00     0.25      0.0  0.000000      0.0     0.00   

   PANAS_9  PANAS_10  ...  PANAS_18  PANAS_19  PANAS_20  PANAS_21  PANAS_22  \
0      0.0      0.25  ...      0.75  0.333333  0.333333      0.25      0.00   
1      0.0      0.75  ...      0.75  0.333333  0.666667      0.50      0.50   
2      0.0      0.00  ...      0.50  0.000000  0.000000      0.00      0.00   
3      0.0      0.00  ...      0.50  0.000000  0.000000      0.00      0.25   
4      0.0      0.00  ...      0.25  0.000000  0.000000      0.00      0.00   

   PANAS_23  PANAS_24  PANAS_25  PANAS_26  Person 

## Vizualizacija značilk v času ter glede na stanje 

In [4]:

# Add a time index if not present
data_normalized['Time'] = data_normalized.index

# vizualizacijo  značilk v stanju ne točno razumem, zato sem jo zakomentiral
'''  
# Plot features over time colored by state (Person) using Bokeh
plots = []
for feature in X.columns:
    p = figure(title=f'{feature} Over Time by Person', x_axis_label='Time', y_axis_label=feature, width=400, height=400)
    for person_id in data_normalized['Person'].unique():
        person_data = data_normalized[data_normalized['Person'] == person_id]
        if not person_data.empty:
            p.line(person_data['Time'], person_data[feature], legend_label=f'Person {person_id}', line_width=2)
    plots.append(p)

# Arrange plots in a grid
grid = gridplot(plots, ncols=3)
show(grid)
'''


"  \n# Plot features over time colored by state (Person) using Bokeh\nplots = []\nfor feature in X.columns:\n    p = figure(title=f'{feature} Over Time by Person', x_axis_label='Time', y_axis_label=feature, width=400, height=400)\n    for person_id in data_normalized['Person'].unique():\n        person_data = data_normalized[data_normalized['Person'] == person_id]\n        if not person_data.empty:\n            p.line(person_data['Time'], person_data[feature], legend_label=f'Person {person_id}', line_width=2)\n    plots.append(p)\n\n# Arrange plots in a grid\ngrid = gridplot(plots, ncols=3)\nshow(grid)\n"

## Identifikacija, katere značilke so najbolj korelirane s ciljnim stanjem

In [5]:
# 4. Identifying Features Most Correlated with the Target State

# Calculate correlation matrix
correlation_matrix = data_normalized.corr()

# Get correlations with the target variable
target_correlations = correlation_matrix['Person'].drop('Person')

# Sort features by correlation absolute value
top_features = target_correlations.abs().sort_values(ascending=False)

# Display top correlated features
print("Top features correlated with the target state:")
print(top_features.head(10))

Top features correlated with the target state:
Time        0.997281
PANAS_21    0.625991
PANAS_7     0.534297
PANAS_9     0.525901
PANAS_3     0.508694
PANAS_24    0.501713
PANAS_8     0.470010
PANAS_6     0.401062
PANAS_16    0.361093
PANAS_12    0.350325
Name: Person, dtype: float64


## Dodatne časovne značilke 

Izdelava in vizualizacija 

tukaj mi tudi ni jasno kaj naredit