In [None]:
import pandas as pd
import numpy as np
import chardet
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
import pyarrow as pa
import pyarrow.parquet as pq
import time
import csv
import io
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import partial
csv.field_size_limit(int(1e9)) 

In [None]:
def processed_file_analysis(filename):
    df = pd.read_csv(filename + '.csv')
    df.rename(columns={'A': 'Timing'}, inplace=True)
    df['Timing'] = pd.to_datetime(df['Timing'], format='%Y-%m-%d %H:%M:%S:%f')
    df['Milliseconds'] = df['Timing'].dt.microsecond // 1000
    df['Time Diff'] = df['Milliseconds'].diff().fillna(10.0)
    df.loc[df['Time Diff'] < 0, 'Time Diff'] = 10.0
    
    return df

def process_row(df_row, S5=0):
    A = df_row[0] 
    try:
        B, C, D = map(float, df_row[1:4])
    except ValueError:
        return [A, None, None, None, None, None, None, None, None, None]
    
    if B == 0 and C == 0 and D == 0:
        return [A, B, C, D, None, None, None, None, None, None]
    
    angle_360 = np.sign(B) * np.arccos(-D / np.sqrt(B**2 + D**2)) * 180 / np.pi + 180
    angle_updown = np.arcsin(C / np.sqrt(B**2 + C**2 + D**2)) * 180 / np.pi
    body_rotation = "supine-recline" if S5 < angle_360 < (S5 + 180) else "prone-sit"
    
    if body_rotation == "prone-sit":
        if angle_updown > 0:
            prone_sit_class = "prone"
        elif angle_updown > -23:
            prone_sit_class = "prone supported"
        elif angle_updown > -63:
            prone_sit_class = "upright"
        else:
            prone_sit_class = "sitting"
        supine_recline_class = ""
    else:
        if angle_updown > 15:
            supine_recline_class = "upsidedown"
        elif angle_updown < -36:
            supine_recline_class = "reclined"
        elif angle_360 < (S5 + 69):
            supine_recline_class = "left side"
        elif angle_360 > (S5 + 101):
            supine_recline_class = "right side"
        else:
            supine_recline_class = "supine"
        prone_sit_class = ""
    
    overall_class = prone_sit_class + supine_recline_class
    
    return [A, B, C, D, angle_360, angle_updown, body_rotation, prone_sit_class, supine_recline_class, overall_class]

def process_dataset(file):
    """
    Process a large dataset from a CSV file using pandas for efficient I/O.
    
    :param file: Name of the CSV file (with .csv extension)
    """
    output_file = file.rsplit('.', 1)[0] + '_processed_sheet.csv'
    
    df = pd.read_csv(file, skiprows=100, header=None)
    processed_data = df.apply(lambda row: process_row(row), axis=1)
    
    processed_df = pd.DataFrame(processed_data.tolist(), columns=['A (String)', 'B', 'C', 'D', '360 angle', 'Up/down angle', 'Body Rotation', 'Prone-sit class', 'Supine-recline class', 'Overall class'])
    processed_df.to_csv(output_file, index=False)
    
    print(f"Processing complete. Results saved as {output_file} in the current folder")
    print(f"Total rows read: {len(df)}")
    print(f"Rows successfully processed: {len(processed_df)}")

def dataset_description(df):
    
    class_counts = df['Overall class'].fillna('NaN').groupby(df['Overall class'].fillna('Missing Rows')).count().reset_index(name='Class Count')
    class_counts['Duration in seconds'] = class_counts['Class Count'] / 100
    class_counts = class_counts[['Overall class', 'Duration in seconds']]
    
    total_duration = class_counts['Duration in seconds'].sum()
    # print(f"Duration of Video: {total_duration}")
    
    # statistics = df.describe()
    return class_counts, total_duration

def create_plot(df):
    class_counts = df['Overall class'].fillna('NaN').groupby(df['Overall class'].fillna('Missing Rows')).count().reset_index(name='Class Count')
    class_counts['Duration in seconds'] = class_counts['Class Count'] / 100
    
    fig, ax = plt.subplots(figsize=(8, 6))  # Create figure and axes objects
    bars = ax.bar(class_counts['Overall class'], class_counts['Duration in seconds'])
    
    for bar in bars:
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, yval + 1, round(yval, 2), ha='center', va='bottom')
    
    ax.set_xticklabels(class_counts['Overall class'], rotation=45)
    ax.set_yticks([])
    ax.set_xlabel('Category')
    ax.set_ylabel('Duration in Seconds')
    sns.despine(ax=ax, bottom=True, left=True)

    plt.show()
    
    # st.pyplot(fig)  # Provide the figure to st.pyplot

def plot_bins(df, class_name):
    same_class_mask = df['Overall class'] == df['Overall class'].shift(1)
    df['Increment'] = np.where(same_class_mask, 10, 0)
    df['Rolling Sum'] = df['Increment'].groupby((~same_class_mask).cumsum()).cumsum() / 1000
    df.drop(columns=['Increment'], inplace=True)
    
    d = df[df['Overall class'] == class_name].copy()
    
    # If there are no rows for the given class
    if d.empty:
        return("No values for class '{class_name}' exist.")
#         st.warning(f"No values for class '{class_name}' exist.")
#         return
    
    max_val = d['Rolling Sum'].max()
    
    fixed_bins = [0.1, 0.2, 0.3, 0.4, 0.5, 1]
    variable_bins = np.linspace(1.5, max(max_val, 1.5), num=5)
    bins = np.unique(np.sort(np.concatenate((fixed_bins, variable_bins))))
    
    d['duration_bin'] = pd.cut(d['Rolling Sum'], bins, include_lowest=True)
    
    cnt_bin = d.groupby(['Overall class', 'duration_bin']).size().reset_index(name='bin_count')
    
    # Check if all bin counts are zero
    if cnt_bin['bin_count'].sum() == 0:
        st.warning(f"No values for class '{class_name}' exist.")
        return
    
    cnt_bin['duration_bin'] = cnt_bin['duration_bin'].astype(str)
    
    fig, ax = plt.subplots(figsize=(10, 6))
    bars = ax.bar(x='duration_bin', height='bin_count', data=cnt_bin)
    
    for bar in bars:
        yval = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2, yval + 1, round(yval, 2), ha='center', va='bottom')
    
    ax.set_title(f"Buckets for: {class_name}")
    ax.set_xticklabels(cnt_bin['duration_bin'], rotation=45, ha='right')
    ax.set_yticks([])
    ax.set_xlabel('Duration (seconds)')
    ax.set_ylabel('Count')
    sns.despine(ax=ax, bottom=True, left=True)
    plt.tight_layout()

    plt.show()
    # st.pyplot(fig)
    
def overall_class_stats(df, overall_class):
    class_indices = df[df['Overall class'] == overall_class].index
    cnt_arr = []
    cnt = max_cnt = 1
    start = end = class_indices[0]
    
    for i in range(len(class_indices) - 1):
        if class_indices[i + 1] == class_indices[i] + 1:
            cnt += 1
            max_cnt = max(cnt, max_cnt)
        else:
            end = class_indices[i]
            cnt_arr.append((cnt, start, end))
            start = class_indices[i + 1]
            cnt = 1
    
    cnt_arr.append((cnt, start, end))  # To account for the last sequence
    max_sequence = max(cnt_arr, key=lambda x: x[0])
    
    return max_sequence

def display_dataset(df):
    return df.iloc[:,:-2]

In [None]:
folder_path = input("Enter folder path : ")
os.chdir(folder_path)

# Enter File Name Here

In [None]:
file_name = input("Enter file name : ")

In [None]:
df = process_dataset(file_name)

In [None]:
df_processed = processed_file_analysis(df)
display_dataset(df_processed)

# Analysis

In [None]:
dataset_description(df_processed)

In [None]:
class_name = input("Enter Class name : ")

In [None]:
overall_class_stats(df_processed,class_name)

# Graphs

In [None]:
create_plot(df_processed)

In [None]:
position_name = input("Enter Class name : ")

In [None]:
plot_bins(df_processed,position_name)