In [13]:
import pandas as pd
import numpy as np

# Create an example DataFrame
output = pd.read_csv('mapje/meeting_1/7_mediapipe.csv')
noOfFrames = len(output.Frame.value_counts())
data = {'Frame': list(range(noOfFrames)), 'Timestamp': [None] * noOfFrames}
df = pd.DataFrame(data)

df.loc[df['Frame'] == 0, 'Timestamp'] = '14:04:50:0000'
df.loc[df['Frame'] == 33225, 'Timestamp'] = '14:26:59:0000'



# Convert the known timestamps to pd.Timestamp objects
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%H:%M:%S:%f', errors='coerce')

# Calculate the time difference and the number of frames between the known timestamps
time_diff = df.loc[df['Frame'] == 33225, 'Timestamp'].iloc[0] - df.loc[df['Frame'] == 0, 'Timestamp'].iloc[0]
frame_diff = 33225

# Calculate the average time per frame
avg_time_per_frame = time_diff / frame_diff

# Fill in the timestamps for the rest of the frames
for frame_num in range(1, noOfFrames):
    prev_timestamp = df.loc[df['Frame'] == frame_num - 1, 'Timestamp'].iloc[0]
    curr_timestamp = prev_timestamp + avg_time_per_frame
    df.loc[df['Frame'] == frame_num, 'Timestamp'] = curr_timestamp

# Convert the timestamps back to their original format

df['Timestamp'] = df['Timestamp'].apply(lambda x: x.strftime('%H:%M:%S:%f')[:-3] if not pd.isna(x) else np.nan)



In [9]:
import pandas as pd
import numpy as np

def convert_whole_number_columns_to_int(df):
    for col in df.columns:
        # Check if the column has a numeric data type
        if pd.api.types.is_numeric_dtype(df[col]):
            # Check if all values in the column are whole numbers (i.e., the same when rounded)
            if df[col].eq(df[col].round()).all():
                # Convert the column to integers
                df[col] = df[col].astype(int)
    return df

def timeInterpolation(firstKnownFrame, lastKnownFrame, timestamp1, timestamp2, meetingNumber):
    # example input: (20, 31927, '14:04:50:0000', '14:26:59:0400', 5)
# Create an example DataFrame
    output = pd.read_csv(f'mapje/meeting_{meetingNumber}/9_temporaled.csv')
    noOfFrames = len(output.Frame.value_counts())
    data = {'Frame': list(range(noOfFrames)), 'Timestamp': [None] * noOfFrames}
    df = pd.DataFrame(data)

    # Set the known timestamps for frames 2000 and 28000
    df.loc[df['Frame'] == firstKnownFrame, 'Timestamp'] = timestamp1
    df.loc[df['Frame'] == lastKnownFrame, 'Timestamp'] = timestamp2

    # Convert the known timestamps to pd.Timestamp objects
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%H:%M:%S:%f', errors='coerce')

    # Calculate the time difference and the number of frames between the known timestamps
    time_diff = df.loc[df['Frame'] == lastKnownFrame, 'Timestamp'].iloc[0] - df.loc[df['Frame'] == firstKnownFrame, 'Timestamp'].iloc[0]
    frame_diff = lastKnownFrame - firstKnownFrame

    # Calculate the average time per frame
    avg_time_per_frame = time_diff / frame_diff

    # Fill in the timestamps for the rest of the frames    
    for frame_num in range(0, noOfFrames):  # Start from 0 and end at 30000
        if frame_num == firstKnownFrame:
            continue  # Skip the frame with the known timestamp
        if frame_num > 0:
            prev_timestamp = df.loc[df['Frame'] == frame_num - 1, 'Timestamp'].iloc[0]
            curr_timestamp = prev_timestamp + avg_time_per_frame
            df.loc[df['Frame'] == frame_num, 'Timestamp'] = curr_timestamp
            
    ## code below is slightly more accurate incase frame-pacing is incorrect, but not really neccessary.
    # for frame_num in range(0, noOfFrames):  # Start from 0 and end at noOfFrames - 1
    #     if frame_num == firstKnownFrame:
    #         continue  # Skip the frame with the known timestamp
    #     if frame_num > 0:
    #         # Update the average time per frame if the current frame is after the last known frame
    #         if frame_num == lastKnownFrame + 1:
    #             time_diff = df.loc[df['Frame'] == lastKnownFrame, 'Timestamp'].iloc[0] - df.loc[df['Frame'] == firstKnownFrame, 'Timestamp'].iloc[0]
    #             frame_diff = noOfFrames - 1 - firstKnownFrame
    #             avg_time_per_frame = time_diff / frame_diff

    #         prev_timestamp = df.loc[df['Frame'] == frame_num - 1, 'Timestamp'].iloc[0]
    #         curr_timestamp = prev_timestamp + avg_time_per_frame
    #         df.loc[df['Frame'] == frame_num, 'Timestamp'] = curr_timestamp

    # Convert the timestamps back to their original format
    df['Timestamp'] = df['Timestamp'].apply(lambda x: x.strftime('%H:%M:%S:%f')[:-3] if not pd.isna(x) else np.nan)
    return df

def createLabels(originalDF,meetingNumber=0):
    labels_df = pd.read_excel(f"Meetings/meeting_{meetingNumber}/excelChat.xlsx")
    for person in labels_df.columns:
        if person == 'timestamp':
            continue
        newDF = originalDF.copy(deep=True)
        newDF['Timestamp'] = pd.to_datetime(newDF['Timestamp'], format='%H:%M:%S:%f')
        newDF['Label'] = 'x'
        for index, row in labels_df.iterrows():
            label_timestamp = pd.to_datetime(row['timestamp'], format='%H:%M:%S.%f')
            label_value = row[person]

            # Find the nearest frame by timestamp, rounding down
            nearest_frame = newDF[newDF['Timestamp'] <= label_timestamp].iloc[-1]['Frame']

            # Calculate the start and end frame numbers
            start_frame = max(0, nearest_frame - 500)
            end_frame = nearest_frame + 1

            # Update the 'Label' column for the specified range
            newDF.loc[start_frame:end_frame, 'Label'] = label_value
        newDF['Timestamp'] = newDF['Timestamp'].apply(lambda x: x.strftime('%H:%M:%S:%f')[:-3] if not pd.isna(x) else np.nan)

        # Print the updated dataframe
        print(person)
        newDF.to_csv(f'mapje/meeting_{meetingNumber}/timestamped_{person}.csv', index=False)   


In [11]:
# meetingNumber = 1
# df = timeInterpolation(0, 33225, '14:04:50:0000', '14:26:59:0000', meetingNumber)

# meetingNumber = 2
# df = timeInterpolation(0, 28451 , '11:04:16:9600', '11:23:15:0000', meetingNumber)

#meetingNumber = 3
#df = timeInterpolation(0, 30560 , '11:04:44:0000', '11:25:10:0000', meetingNumber)

# meetingNumber = 4
# df = timeInterpolation(0, 41074, '11:06:27:0400', '11:33:50:0000', meetingNumber)

meetingNumber = 5
df = timeInterpolation(0, 31125, '11:06:31:0000', '11:27:16:0000', meetingNumber)

createLabels(df,meetingNumber)

Elena
Rockey
Michalina
Jeroen


In [1]:
import pandas as pd
import numpy as np
def convert_whole_number_columns_to_int(df):
    for col in df.columns:
        # Check if the column has a numeric data type
        if pd.api.types.is_numeric_dtype(df[col]):
            # Check if all values in the column are whole numbers (i.e., the same when rounded)
            if df[col].eq(df[col].round()).all():
                # Convert the column to integers
                df[col] = df[col].astype(int)
    return df
outputDF = pd.read_csv('mapje/meeting_1/9_temporaled.csv')
outputDF = convert_whole_number_columns_to_int(outputDF)
outputDF['Label'] = np.nan
outputDF['Timestamp'] = np.nan

bobDF = pd.read_csv('mapje/meeting_1/timestamped_Bob.csv')
ioanaDF = pd.read_csv('mapje/meeting_1/timestamped_Ioana.csv')
paulDF = pd.read_csv('mapje/meeting_1/timestamped_Paul.csv')

# Add a 'Person' column to each DataFrame and set the appropriate values
bobDF['Person'] = 1
ioanaDF['Person'] = 2
paulDF['Person'] = 3

# Merge on 'Frame' and 'Person' columns to ensure correct row alignment
outputDF = outputDF.merge(bobDF, on=['Frame', 'Person'], how='left', suffixes=('', '_bob'))
outputDF = outputDF.merge(ioanaDF, on=['Frame', 'Person'], how='left', suffixes=('', '_ioana'))
outputDF = outputDF.merge(paulDF, on=['Frame', 'Person'], how='left', suffixes=('', '_paul'))

# Update the 'Label' and 'Timestamp' columns in outputDF with the values from merged DataFrames
outputDF['Label'] = outputDF['Label_bob'].combine_first(outputDF['Label_ioana']).combine_first(outputDF['Label_paul']).combine_first(outputDF['Label'])
outputDF['Timestamp'] = outputDF['Timestamp_bob'].combine_first(outputDF['Timestamp_ioana']).combine_first(outputDF['Timestamp_paul']).combine_first(outputDF['Timestamp'])

# Drop the intermediate columns
outputDF = outputDF.drop(columns=['Label_bob', 'Timestamp_bob', 'Label_ioana', 'Timestamp_ioana', 'Label_paul', 'Timestamp_paul'])

# Print outputDF
# print(outputDF)


In [2]:
unique_person_ids = outputDF["Person"].unique()

for person_id in unique_person_ids:
    temp = outputDF[outputDF["Person"] == person_id]
    temp.to_csv(f'mapje/meeting_1/final_labeled_person_{person_id}.csv', index=False)

In [3]:
outputDF = pd.read_csv('mapje/meeting_2/9_temporaled.csv')
outputDF = convert_whole_number_columns_to_int(outputDF)
outputDF['Label'] = np.nan
outputDF['Timestamp'] = np.nan

elishaDF = pd.read_csv('mapje/meeting_2/timestamped_Elisha.csv')
dylanDF = pd.read_csv('mapje/meeting_2/timestamped_Dylan.csv')
keithDF = pd.read_csv('mapje/meeting_2/timestamped_Keith.csv')
robertoDF = pd.read_csv('mapje/meeting_2/timestamped_Roberto.csv')

# Add a 'Person' column to each DataFrame and set the appropriate values
elishaDF['Person'] = 0
dylanDF['Person'] = 1
keithDF['Person'] = 2
robertoDF['Person'] = 3

# Merge on 'Frame' and 'Person' columns to ensure correct row alignment
outputDF = outputDF.merge(elishaDF, on=['Frame', 'Person'], how='left', suffixes=('', '_elisha'))
outputDF = outputDF.merge(dylanDF, on=['Frame', 'Person'], how='left', suffixes=('', '_dylan'))
outputDF = outputDF.merge(keithDF, on=['Frame', 'Person'], how='left', suffixes=('', '_keith'))
outputDF = outputDF.merge(robertoDF, on=['Frame', 'Person'], how='left', suffixes=('', '_roberto'))

# Update the 'Label' and 'Timestamp' columns in outputDF with the values from merged DataFrames
outputDF['Label'] = outputDF['Label_elisha'].combine_first(outputDF['Label_dylan']).combine_first(outputDF['Label_keith']).combine_first(outputDF['Label_roberto']).combine_first(outputDF['Label'])
outputDF['Timestamp'] = outputDF['Timestamp_elisha'].combine_first(outputDF['Timestamp_dylan']).combine_first(outputDF['Timestamp_keith']).combine_first(outputDF['Timestamp_roberto']).combine_first(outputDF['Timestamp'])

# Drop the intermediate columns
outputDF = outputDF.drop(columns=['Label_elisha', 'Label_dylan', 'Label_keith', 'Label_roberto', 
                                  'Timestamp_elisha', 'Timestamp_dylan', 'Timestamp_keith', 'Timestamp_roberto'])

# Print outputDF
#print(outputDF)

  outputDF['Label'] = np.nan
  outputDF['Timestamp'] = np.nan


In [4]:
unique_person_ids = outputDF["Person"].unique()

for person_id in unique_person_ids:
    temp = outputDF[outputDF["Person"] == person_id]
    temp.to_csv(f'mapje/meeting_2/final_labeled_person_{person_id}.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
def convert_whole_number_columns_to_int(df):
    for col in df.columns:
        # Check if the column has a numeric data type
        if pd.api.types.is_numeric_dtype(df[col]):
            # Check if all values in the column are whole numbers (i.e., the same when rounded)
            if df[col].eq(df[col].round()).all():
                # Convert the column to integers
                df[col] = df[col].astype(int)
    return df
outputDF = pd.read_csv('mapje/meeting_3/9_temporaled.csv')
outputDF = convert_whole_number_columns_to_int(outputDF)
outputDF['Label'] = np.nan
outputDF['Timestamp'] = np.nan

saiwingDF = pd.read_csv('mapje/meeting_3/timestamped_Sai-Wing.csv')
davidDF = pd.read_csv('mapje/meeting_3/timestamped_David.csv')
danielDF = pd.read_csv('mapje/meeting_3/timestamped_Daniel.csv')
mathildeDF = pd.read_csv('mapje/meeting_3/timestamped_Mathilde.csv')
nicoleDF = pd.read_csv('mapje/meeting_3/timestamped_Nicole.csv')

# Add a 'Person' column to each DataFrame and set the appropriate values
saiwingDF['Person'] = 0
davidDF['Person'] = 1
danielDF['Person'] = 2
mathildeDF['Person'] = 4
nicoleDF['Person'] = 5

# Merge on 'Frame' and 'Person' columns to ensure correct row alignment
outputDF = outputDF.merge(saiwingDF, on=['Frame', 'Person'], how='left', suffixes=('', '_saiwing'))
outputDF = outputDF.merge(davidDF, on=['Frame', 'Person'], how='left', suffixes=('', '_david'))
outputDF = outputDF.merge(danielDF, on=['Frame', 'Person'], how='left', suffixes=('', '_daniel'))
outputDF = outputDF.merge(mathildeDF, on=['Frame', 'Person'], how='left', suffixes=('', '_mathilde'))
outputDF = outputDF.merge(nicoleDF, on=['Frame', 'Person'], how='left', suffixes=('', '_nicole'))


# Update the 'Label' and 'Timestamp' columns in outputDF with the values from merged DataFrames
outputDF['Label'] = outputDF['Label_saiwing'].combine_first(outputDF['Label_david']).combine_first(outputDF['Label_daniel']).combine_first(outputDF['Label_mathilde']).combine_first(outputDF['Label_nicole']).combine_first(outputDF['Label'])
outputDF['Timestamp'] = outputDF['Timestamp_saiwing'].combine_first(outputDF['Timestamp_david']).combine_first(outputDF['Timestamp_daniel']).combine_first(outputDF['Timestamp_mathilde']).combine_first(outputDF['Timestamp_nicole']).combine_first(outputDF['Timestamp'])

# Drop the intermediate columns
outputDF = outputDF.drop(columns=['Label_saiwing', 'Label_david', 'Label_daniel', 'Label_mathilde', 'Label_nicole',
                                  'Timestamp_saiwing', 'Timestamp_david', 'Timestamp_daniel', 'Timestamp_mathilde', 'Timestamp_nicole'])

# Print outputDF
print(outputDF)

In [3]:
unique_person_ids = outputDF["Person"].unique()

for person_id in unique_person_ids:
    temp = outputDF[outputDF["Person"] == person_id]
    temp.to_csv(f'mapje/meeting_3/final_labeled_person_{person_id}.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
def convert_whole_number_columns_to_int(df):
    for col in df.columns:
        # Check if the column has a numeric data type
        if pd.api.types.is_numeric_dtype(df[col]):
            # Check if all values in the column are whole numbers (i.e., the same when rounded)
            if df[col].eq(df[col].round()).all():
                # Convert the column to integers
                df[col] = df[col].astype(int)
    return df
outputDF = pd.read_csv('mapje/meeting_4/9_temporaled.csv')
outputDF = convert_whole_number_columns_to_int(outputDF)
outputDF['Label'] = np.nan
outputDF['Timestamp'] = np.nan

simraDF = pd.read_csv('mapje/meeting_4/timestamped_Simra.csv')
karenDF = pd.read_csv('mapje/meeting_4/timestamped_Karen.csv')
lisaDF = pd.read_csv('mapje/meeting_4/timestamped_Lisa.csv')
gideonDF = pd.read_csv('mapje/meeting_4/timestamped_Gideon.csv')

# Add a 'Person' column to each DataFrame and set the appropriate values
simraDF['Person'] = 0
karenDF['Person'] = 1
lisaDF['Person'] = 2
gideonDF['Person'] = 4

# Merge on 'Frame' and 'Person' columns to ensure correct row alignment
outputDF = outputDF.merge(simraDF, on=['Frame', 'Person'], how='left', suffixes=('', '_simra'))
outputDF = outputDF.merge(karenDF, on=['Frame', 'Person'], how='left', suffixes=('', '_karen'))
outputDF = outputDF.merge(lisaDF, on=['Frame', 'Person'], how='left', suffixes=('', '_lisa'))
outputDF = outputDF.merge(gideonDF, on=['Frame', 'Person'], how='left', suffixes=('', '_gideon'))


# Update the 'Label' and 'Timestamp' columns in outputDF with the values from merged DataFrames
outputDF['Label'] = outputDF['Label_simra'].combine_first(outputDF['Label_karen']).combine_first(outputDF['Label_lisa']).combine_first(outputDF['Label_gideon']).combine_first(outputDF['Label'])
outputDF['Timestamp'] = outputDF['Timestamp_simra'].combine_first(outputDF['Timestamp_karen']).combine_first(outputDF['Timestamp_lisa']).combine_first(outputDF['Timestamp_gideon']).combine_first(outputDF['Timestamp'])

# Drop the intermediate columns
outputDF = outputDF.drop(columns=['Label_simra', 'Label_karen', 'Label_lisa', 'Label_gideon',
                                  'Timestamp_simra', 'Timestamp_karen', 'Timestamp_lisa', 'Timestamp_gideon'])

# Print outputDF
print(outputDF)

In [8]:
unique_person_ids = outputDF["Person"].unique()

for person_id in unique_person_ids:
    temp = outputDF[outputDF["Person"] == person_id]
    temp.to_csv(f'mapje/meeting_4/final_labeled_person_{person_id}.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
def convert_whole_number_columns_to_int(df):
    for col in df.columns:
        # Check if the column has a numeric data type
        if pd.api.types.is_numeric_dtype(df[col]):
            # Check if all values in the column are whole numbers (i.e., the same when rounded)
            if df[col].eq(df[col].round()).all():
                # Convert the column to integers
                df[col] = df[col].astype(int)
    return df
outputDF = pd.read_csv('mapje/meeting_5/9_temporaled.csv')
outputDF = convert_whole_number_columns_to_int(outputDF)
outputDF['Label'] = np.nan
outputDF['Timestamp'] = np.nan

rockeyDF = pd.read_csv('mapje/meeting_5/timestamped_Rockey.csv')
michalinaDF = pd.read_csv('mapje/meeting_5/timestamped_Michalina.csv')
jeroenDF = pd.read_csv('mapje/meeting_5/timestamped_Jeroen.csv')
elenaDF = pd.read_csv('mapje/meeting_5/timestamped_Elena.csv')

# Add a 'Person' column to each DataFrame and set the appropriate values
rockeyDF['Person'] = 0
michalinaDF['Person'] = 1
jeroenDF['Person'] = 2
elenaDF['Person'] = 4

# Merge on 'Frame' and 'Person' columns to ensure correct row alignment
outputDF = outputDF.merge(rockeyDF, on=['Frame', 'Person'], how='left', suffixes=('', '_rockey'))
outputDF = outputDF.merge(michalinaDF, on=['Frame', 'Person'], how='left', suffixes=('', '_michalina'))
outputDF = outputDF.merge(jeroenDF, on=['Frame', 'Person'], how='left', suffixes=('', '_jeroen'))
outputDF = outputDF.merge(elenaDF, on=['Frame', 'Person'], how='left', suffixes=('', '_elena'))


# Update the 'Label' and 'Timestamp' columns in outputDF with the values from merged DataFrames
outputDF['Label'] = outputDF['Label_rockey'].combine_first(outputDF['Label_michalina']).combine_first(outputDF['Label_jeroen']).combine_first(outputDF['Label_elena']).combine_first(outputDF['Label'])
outputDF['Timestamp'] = outputDF['Timestamp_rockey'].combine_first(outputDF['Timestamp_michalina']).combine_first(outputDF['Timestamp_jeroen']).combine_first(outputDF['Timestamp_elena']).combine_first(outputDF['Timestamp'])

# Drop the intermediate columns
outputDF = outputDF.drop(columns=['Label_rockey', 'Label_michalina', 'Label_jeroen', 'Label_elena',
                                  'Timestamp_rockey', 'Timestamp_michalina', 'Timestamp_jeroen', 'Timestamp_elena'])

# Print outputDF
print(outputDF)

In [14]:
unique_person_ids = outputDF["Person"].unique()

for person_id in unique_person_ids:
    temp = outputDF[outputDF["Person"] == person_id]
    temp.to_csv(f'mapje/meeting_5/final_labeled_person_{person_id}.csv', index=False)