In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv("newhr.csv")

# Function to replace -1 values with 10% below the next non-negative value
def replace_minus_one(df):
    for index, row in df.iterrows():
        if row['hr'] == -1:
            # Find the next non-negative value
            next_index = index + 1
            while next_index < len(df) and df.at[next_index, 'hr'] == -1:
                next_index += 1
            if next_index < len(df):
                next_value = df.at[next_index, 'hr']
                # Calculate 10% below the next non-negative value
                replacement_value = next_value * 0.9
                # Replace all -1 values in the sequence with the calculated value
                sequence_indices = range(index, next_index)
                df.loc[sequence_indices, 'hr'] = replacement_value
            else:
                # If there are no more non-negative values, interpolate the first value
                prev_index = index - 1
                while prev_index >= 0 and df.at[prev_index, 'hr'] == -1:
                    prev_index -= 1
                if prev_index >= 0:
                    prev_value = df.at[prev_index, 'hr']
                    # Interpolate the first value based on the previous non-negative value
                    df.at[index, 'hr'] = prev_value * 1.1  # 10% above the previous non-negative value
    return df

# Replace -1 values with 10% below the next non-negative value
df = replace_minus_one(df)

# Save the DataFrame to a new CSV file
#df.to_csv('updated_dataset.csv', index=False)
df.head()

In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv("updated_base_dataset_with_distance.csv")

# Function to compare HR values from row r and row r+1 and adjust if necessary
def compare_and_adjust_hr(row, next_row):
    hr_r = row['hr']
    hr_r_plus_1 = next_row['hr']

    # Check if HR values are equal
    if hr_r == hr_r_plus_1:
        hr_r_plus_1 *= 1.05  # Increase HR value in next row by 5%

    return pd.Series([hr_r, hr_r_plus_1])

# Add a new column with HR values from row r and row r+1
def adjust_hr(df):
    hr_values = df.apply(lambda row: compare_and_adjust_hr(row, df.shift(-1).loc[row.name]), axis=1)
    df[['hr_r', 'hr_r_plus_1']] = hr_values
    return df

# Apply the adjustment function
df = adjust_hr(df)

# Save the DataFrame to a new CSV file
df.to_csv('adjusted_hr.csv', index=False)

df.head(30)


In [None]:
import numpy as np
from scipy.interpolate import CubicSpline

df = pd.read_csv("adjusted_hr.csv")


# Define a function to interpolate values using cubic spline
def interpolate_cubic_spline(x, y):
    spline = CubicSpline([0, 1], [x, y])
    interpolated_values = spline(np.linspace(0, 1, 125))
    return interpolated_values

# Clean the data
df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=['hr_r', 'hr_r_plus_1'])

# Assuming your data is stored in a pandas DataFrame called 'df'
# Replace 'new_column_name' with the desired name for the new column
#df['hrTest'] = df.apply(lambda row: interpolate_cubic_spline(row['hr_r'], row['hr_r_plus_1']), axis=1)

df['test'] = df.apply(lambda row: str(list(interpolate_cubic_spline(row['hr_r'], row['hr_r_plus_1']))), axis=1)

df.to_csv('test12345.csv', index=False)
df.head()

In [None]:
import pandas as pd
import numpy as np
from scipy.interpolate import CubicSpline

# Read the CSV file into a DataFrame
df = pd.read_csv("adjusted_hr.csv")

# Function to interpolate values for each row
def interpolate_values(row):
    try:
        # Convert the string representation to a list of floats
        hr_values = [float(value) for value in row['hr_combined'][1:-1].split(',')]

        # Check if all values are finite
        if not all(np.isfinite(hr_values)):
            return np.nan
        
        # Adjust values if consecutive values are equal
        for i in range(len(hr_values) - 1):
            if hr_values[i] == hr_values[i+1]:
                hr_values[i+1] *= 1.05  # Increase the second value by 5%
        
        # Interpolate 125 values between the first and last values using cubic spline
        cs = CubicSpline([0, 1], [hr_values[0], hr_values[-1]])
        interpolated_values = cs(np.linspace(0, 1, 125))
        
        # Convert interpolated values to string with commas and enclose in square brackets
        interpolated_values_str = '[' + ','.join(map(str, interpolated_values)) + ']'
        
        return interpolated_values_str
    except Exception as e:
        print(f"Error processing row {row.name}: {e}")
        return np.nan

# Apply the function to each row and store the result in a new column
df['interpolated_values'] = df.apply(interpolate_values, axis=1)

# Add a new column to double-check the modified values
df['hr_combined_modified'] = df['hr_combined']

df.head(30)
# Save the DataFrame to a new CSV file
df.to_csv('sds1.csv', index=False)


In [None]:
import pandas as pd
import numpy as np
from scipy.interpolate import CubicSpline

# Read the CSV file into a DataFrame
df = pd.read_csv("updated_base_dataset_with_distance.csv")

# Function to interpolate values for each row
def interpolate_values(row):
    try:
        hr_values = [float(value) for value in row['hr_combined'][1:-1].split(',')]
        
        # Check if the row needs interpolation
        if len(set(hr_values)) == 1:
            # If all values in the row are the same, repeat that value
            interpolated_values_str = '[' + ','.join([str(hr_values[0])] * 125) + ']'
        else:
            # Interpolate only if there are more than two unique values
            if len(set(hr_values)) > 2:
                cs = CubicSpline([0, 1], [hr_values[0], hr_values[-1]])
                interpolated_values = cs(np.linspace(0, 1, 125))
            else:
                interpolated_values = hr_values
            interpolated_values_str = '[' + ','.join(map(str, interpolated_values)) + ']'
        
        return interpolated_values_str
    except Exception as e:
        print(f"Error processing row {row.name}: {e}")
        return np.nan

# Apply the function to each row and store the result in a new column
df['interpolated_values'] = df.apply(interpolate_values, axis=1)

# Save the DataFrame to a new CSV file
df.to_csv('updated_base_dataset_with_distance_Test.csv', index=False)


In [None]:
import numpy as np
import pandas as pd
df = pd.read_csv("orderd_base_dataset.csv")

# Function to calculate overall distance traveled for each row
def calculate_overall_distance(row):
    if not pd.isnull(row['rawData']):
        acceleration_values = np.array(row['rawData'].strip('[]').split(',')).astype(float)
        time_interval = 1  # Assuming time interval is 1 second

        # Convert acceleration values from milli-g to m/s^2
        acceleration_mps2 = acceleration_values * 9.81 / 1000  # 1 g = 9.81 m/s^2
        
        # Integrate acceleration values twice to obtain distance traveled
        velocity = np.cumsum(acceleration_mps2 * time_interval)
        displacement = np.cumsum(velocity * time_interval)
        
        # Calculate total distance traveled
        total_distance = displacement[-1]
    else:
        total_distance = np.nan
    
    return total_distance

# Apply the function to calculate overall distance for each row
df['overall_distance'] = df.apply(calculate_overall_distance, axis=1)

# Output the DataFrame with the new 'overall_distance' column
print(df)
# Output the DataFrame with the new 'overall_distance' column to a CSV file
df.to_csv('updated_base_dataset_with_distance_taversed.csv', index=False)


In [None]:
import pandas as pd

# Read the CSV file into DataFrame
df = pd.read_csv("NEW_BASE_DATASET.csv")

# Calculate overall distance traveled for each row
df['overall_distance'] = df.loc[:, 'r1':'r125'].sum(axis=1)

# Reorder columns
cols = list(df.columns)
cols.insert(cols.index('r1'), cols.pop(cols.index('overall_distance')))
df = df[cols]

# Sort the DataFrame based on the 'Id' column
df.sort_values(by='Id', inplace=True)

# Save the updated DataFrame to a new CSV file
df.to_csv("NEW_BASE_DATASET_WITH_DISTANCE.csv", index=False)

In [None]:
import numpy as np

# Set the seed for reproducibility
np.random.seed(0)

# Generate 125 random decimal values between 72.15459677 and 72.55459677
random_values = np.random.uniform( 73.25459677, 73.35459677, 125)

# Print the generated random values
print(random_values)

In [None]:
import numpy as np

# Central value
central_value = 72.09

# Number of rows and columns
num_rows = 9
num_columns = 125

# Generate the initial array with the central value
initial_array = np.full((num_rows, num_columns), central_value, dtype=float)

# Generate the sequence of perturbations for each row
perturbations = np.linspace(-0.205, 0.205, num_columns)

# Apply perturbations to each row
for i in range(num_rows):
    initial_array[i] += np.random.permutation(perturbations)

# Convert array to a list of strings for each row
rows_as_strings = ['\t'.join(map(str, row)) for row in initial_array]

# Join rows with newline characters
data_for_excel = '\n'.join(rows_as_strings)

# Print the resulting data
print(data_for_excel)


In [None]:
import pandas as pd

# Read the data from the CSV file into a DataFrame
df = pd.read_csv("NEW_BASE_DATASET.csv")

# List of columns to drop
columns_to_drop = ['hr1','hr_combined']

# Create a new DataFrame by dropping the specified columns
new_df = df.drop(columns=columns_to_drop)

# Print the new DataFrame
new_df.to_csv('updated_dataset.csv', index=False)
new_df.head()

In [None]:
import pandas as pd

# Read the data from the CSV file into a DataFrame
df = pd.read_csv("updated_dataset.csv")

# Get the 'label' column
label_column = df.pop('label')

# Append the 'label' column to the DataFrame
df['label'] = label_column

# Print the DataFrame with the 'label' column moved to the far right
print(df)
df.to_csv('updated_dataset.csv', index=False)


In [None]:
# Load the original CSV file
df = pd.read_csv('updated_dataset.csv')
len(df)

In [None]:
import pandas as pd

# List of eventIDs to remove
eventIDs_to_remove = [764, 1046, 5721, 5745, 5891, 6587, 6590, 6808, 7006, 7434,
                      8998, 9005, 12206, 14101, 15208, 15230, 15417, 17219, 21603,
                      21855, 21867, 26071, 26987, 27786, 28734, 31397, 31402, 31404,
                      36799, 36812, 44115, 45208, 45795, 45800, 47173]



# Load the original CSV file
df = pd.read_csv('updated_dataset.csv')

# Remove rows with eventIDs in the list
df = df[~df['eventID'].isin(eventIDs_to_remove)]

# Sort the DataFrame by 'eventID' column in ascending order
df = df.sort_values(by='eventID', ascending=True)

df.to_csv('newdataset11.csv', index=False)


In [None]:
import pandas as pd
import numpy as np
import ast

# Load the DataFrame
df = pd.read_csv("formatted_base_dataset.csv")

# Function to calculate overall distance traveled for each row
def calculate_overall_distance(row):
    if not pd.isnull(row['rawData']):
        acceleration_values = np.array(row['rawData'].strip('[]').split(',')).astype(float)
        time_interval = 1  # Assuming time interval is 1 second

        # Convert acceleration values from milli-g to m/s^2
        acceleration_mps2 = acceleration_values * 9.81 / 1000  # 1 g = 9.81 m/s^2
        
        # Integrate acceleration values twice to obtain distance traveled
        velocity = np.cumsum(acceleration_mps2 * time_interval)
        displacement = np.cumsum(velocity * time_interval)
        
        # Calculate total distance traveled
        total_distance = displacement[-1]
    else:
        total_distance = np.nan
    
    return total_distance

# Apply the function to calculate overall distance for each row
df['overall_distance'] = df.apply(calculate_overall_distance, axis=1)

# Output the DataFrame with the new 'overall_distance' column
df.head()
df.to_csv('updated_base_dataset_with_distance.csv', index=False)


In [None]:
import pandas as pd

# Read the CSV file with 'updated_dataset' column into a DataFrame
df_updated_dataset = pd.read_csv("newdataset.csv")

# Provide column names
column_names = ['transposed_labels']

# Read the CSV file without column headings
df_transposed_labels = pd.read_csv("transposed_labels.csv", header=None, names=column_names)

# Check the DataFrame
print(df_transposed_labels.head())



# Check the columns in the DataFrame
print(df_transposed_labels.columns)

# Check if 'transposed_labels' column exists in the DataFrame
if 'transposed_labels' in df_transposed_labels.columns:
    # Create a new DataFrame 'df1' with the desired columns
    df1 = pd.DataFrame({
        'transposed_labels': df_transposed_labels['transposed_labels'],
        'newdataset': df_updated_dataset['eventID']
    })

    # Print the new DataFrame 'df1'
    print(df1)
else:
    print("'transposed_labels' column does not exist in the DataFrame.")

In [None]:
df1.head()
df1.to_csv('newlabels1.csv', index=False)

In [11]:
import pandas as pd

# Load the DataFrame
df = pd.read_csv("NEW_BASE_DATASET_WITH_DISTANCE.csv")

# Repeat each row 125 times
df_repeated = df.reindex(df.index.repeat(125))

# Reset index to ensure the new DataFrame has a continuous index
df_repeated = df_repeated.reset_index(drop=True)

# Concatenate the repeated DataFrame with the selected columns from the original DataFrame
result_df = pd.concat([df_repeated[['Id', 'eventID', 'hr']]], axis=1)

# Print the resulting DataFrame
len(result_df)
result_df.to_csv('hr_exp.csv', index=False)

In [19]:
import pandas as pd
import numpy as np
from scipy.interpolate import CubicSpline

# Load the DataFrame
df = pd.read_csv("hr_exp.csv")

# Introduce a new column to assign unique group identifiers for each consecutive group of 125 values
df['group_id'] = np.arange(len(df)) // 125

# Define a function to perform cubic spline interpolation
def cubic_spline_interpolation(values):
    x = np.arange(len(values))
    cs = CubicSpline(x, values)
    interpolated_values = cs(np.linspace(0, len(values) - 1, 125))
    return interpolated_values

# Apply cubic spline interpolation within each group
interpolated_values_list = []
for group_id, group_df in df.groupby('group_id'):
    values = group_df['hr']
    interpolated_values = cubic_spline_interpolation(values)
    interpolated_values_list.extend(interpolated_values)

# Create a new column with interpolated values transposed downwards
num_rows = len(interpolated_values_list) // 125
new_column_values = np.array(interpolated_values_list).reshape(num_rows, 125).transpose().ravel()
rounded_values = np.round(new_column_values, 4)  # Round to four decimal places
df['interpolated_hr'] = rounded_values[:len(df)]

# Drop the temporary 'group_id' column
df.drop(columns=['group_id'], inplace=True)

# Print the resulting DataFrame
df.to_csv('hr_exp1.csv', index=False)
df.head()

Unnamed: 0,Id,eventID,hr,interpolated_hr
0,1,115,80.1,80.1
1,1,115,80.1,80.1
2,1,115,80.1,89.0
3,1,115,80.1,87.0
4,1,115,80.1,90.0


In [33]:
import pandas as pd
import numpy as np
from scipy.interpolate import CubicSpline

# Load the DataFrame
df = pd.read_csv("hr_exp.csv")  # Replace "hr_exp.csv" with the actual dataset filename

# Introduce noise to the original 'hr' values
noise_level = 0.01  # Adjust the noise level as needed
df['hr'] += np.random.normal(loc=0, scale=noise_level, size=len(df))

# Define a function to perform cubic spline interpolation with the next value
def cubic_spline_interpolation(values):
    interpolated_values = []
    for i in range(len(values) - 1):
        x = np.array([0, 125])  # Define x-values for interpolation (0 and 125)
        y = np.array([values[i], values[i+1]])  # Define y-values for interpolation
        cs = CubicSpline(x, y, bc_type='clamped')  # Create CubicSpline object
        interpolated_segment = cs(np.linspace(0, 125, 125))  # Interpolate between 0 and 125
        interpolated_values.extend(interpolated_segment[:-1])  # Exclude last value to avoid overlap
    return interpolated_values

# Apply cubic spline interpolation to each group
interpolated_values_list = []
for group_id, group_df in df.groupby('eventID'):
    values = group_df['hr'].to_numpy()
    interpolated_values = cubic_spline_interpolation(values)
    interpolated_values_list.extend(interpolated_values)

# Add the interpolated values to the DataFrame
df['interpolated_hr'] = interpolated_values_list[:len(df)]

# Print the resulting DataFrame
print(df)


          Id  eventID         hr  interpolated_hr
0          1      115  80.103221        80.103221
1          1      115  80.100294        80.103220
2          1      115  80.100048        80.103219
3          1      115  80.091512        80.103216
4          1      115  80.077638        80.103212
...      ...      ...        ...              ...
366120  3919    53665  82.006581       114.007734
366121  3919    53665  82.000510       114.007887
366122  3919    53665  81.998240       114.008039
366123  3919    53665  81.988063       114.008190
366124  3919    53665  81.979150       114.008341

[366125 rows x 4 columns]


In [34]:
df.to_csv('df.csv', index=False)

In [39]:
import pandas as pd

# Assuming df is your DataFrame containing the 'interpolated_hr' column

# Initialize an empty DataFrame to store the transposed values
transposed_df = pd.read_csv("df.csv")  # Replace "your_dataset.csv" with the actual dataset filename

df = pd.DataFrame(transposed_df)

# Flatten the 'interpolated_hr' column values
values = df['hr'].values

# Reshape the values into a DataFrame with 125 columns
num_rows = len(values) // 125
reshaped_values = np.reshape(values[:num_rows * 125], (num_rows, 125))

# Create DataFrame with 125 columns
transposed_df = pd.DataFrame(reshaped_values, columns=[f'h{i+1}' for i in range(125)])

# Print the resulting DataFrame
print(transposed_df)
transposed_df.to_csv('hr_expe.csv', index=False)

              h1          h2          h3          h4          h5          h6  \
0      80.103221   80.100294   80.100048   80.091512   80.077638   80.096212   
1      80.090861   80.099847   80.102149   80.092155   80.099536   80.085395   
2      89.003026   88.999340   89.003488   89.012746   88.991172   88.993071   
3      87.000118   87.012756   87.009337   86.994985   87.004912   87.021346   
4      89.990857   90.004698   89.994394   90.004402   90.019298   89.997031   
...          ...         ...         ...         ...         ...         ...   
2924  111.997942  111.984436  112.002916  111.999767  112.005866  111.996924   
2925  123.213757  123.200535  123.183103  123.193626  123.201247  123.202059   
2926  135.506463  135.529051  135.525471  135.493653  135.513697  135.557736   
2927   74.021493   74.011848   74.001949   73.998465   74.013226   73.997381   
2928   82.000069   82.014780   81.985840   82.006693   82.015424   82.001053   

              h7          h8          h