In [3]:
import threading
import queue
import time
import keyboard
import queue
from model import *
import pytchat
from data import *

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model="michellejieli/emotion_text_classifier")
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("michellejieli/emotion_text_classifier")
model = AutoModelForSequenceClassification.from_pretrained("michellejieli/emotion_text_classifier")

# Define a function to set global variables
def set_globals(args):
    global PATIENCE, BATCH_SIZE,DATASET, BLOCK_SIZE, DROPOUT, LEARNING_RATE, EPOCHS, FRAMES_GENERATE, TRAIN, EVAL_EVERY, CHECKPOINT_PATH, L1_LAMBDA, L2_REG, FINETUNE, FINE_TUNING_LR, FINE_TUNING_EPOCHS, PENALTY, LATENT_VIS_EVERY, notes,USE_MDN
    BATCH_SIZE = args.BATCH_SIZE
    BLOCK_SIZE = args.BLOCK_SIZE
    DROPOUT = args.DROPOUT
    LEARNING_RATE = args.LEARNING_RATE
    EPOCHS = args.EPOCHS
    FRAMES_GENERATE = args.FRAMES_GENERATE
    TRAIN = args.TRAIN
    EVAL_EVERY = args.EVAL_EVERY
    CHECKPOINT_PATH = args.CHECKPOINT_PATH
    L1_LAMBDA = args.L1_LAMBDA
    L2_REG = args.L2_REG
    FINETUNE = args.FINETUNE
    FINE_TUNING_LR = args.FINE_TUNING_LR
    FINE_TUNING_EPOCHS = args.FINE_TUNING_EPOCHS
    PENALTY = args.PENALTY
    LATENT_VIS_EVERY = args.LATENT_VIS_EVERY
    USE_MDN = args.USE_MDN
    DATASET = args.DATASET
    notes = args.notes
    PATIENCE = args.PATIENCE
    
    # ---------------------------------
    notes = f"""Proto8 - trying to adapt Pette et al 2019, addign latent visualisation and analysing latent space. Might be slow, maybe take this out when live.

    
    Added MDN layer to model.
    
    All data, added 10% noise to emotions so model is less stuck. With LeakyRelu
    Loss = mse_loss(keypoints) + mse_loss(emotions) because before output emotions ( which feature was added to keypoint features) were not being matched to input emotions
    No penalty.

    Added dropout to keypoints, also changed input to emotion linear to x and not just emotion (emotion + keypoints)
    Taking extra dropout for emotions and keypoints out, because want model to rely on both equally so what's the point

    dropout keypoints and dropout emotion is currently equal but might change this.

    Emotions and keypoints are multimodal and added separately, but features are added in block processing using +.


    Got rid of both L1 and L2, increasing dropout because model acting weird, this is now delta + coord. 
    Delta is between next frame and current frame. So current frame is previous coord+previous delta. Last frame's delta is 0. 
    
    {BATCH_SIZE} batch size, {BLOCK_SIZE} block size, {DROPOUT} dropout, {LEARNING_RATE} learning rate, {EPOCHS} epochs, {FRAMES_GENERATE} frames generated, {TRAIN} train, {EVAL_EVERY} eval every, {CHECKPOINT_PATH} checkpoint path, {L1_LAMBDA} L1 lambda, {L2_REG} L2 reg"""
    # ---------------------------------
    
    # Print the values using f-string for formatting
    print(f"""
    Batch size set to: {BATCH_SIZE}
    Block size set to: {BLOCK_SIZE}
    Dropout rate set to: {DROPOUT}
    Learning rate set to: {LEARNING_RATE}
    Number of epochs set to: {EPOCHS}
    Frames to generate set to: {FRAMES_GENERATE}
    Training mode set to: {TRAIN}
    Evaluation every set to: {EVAL_EVERY}
    Checkpoint path set to: {CHECKPOINT_PATH}
    L1 regularization lambda set to: {L1_LAMBDA}
    L2 regularization lambda set to: {L2_REG}
    Fine-tuning mode set to: {FINETUNE}
    Fine-tuning learning rate set to: {FINE_TUNING_LR}
    Fine-tuning epochs set to: {FINE_TUNING_EPOCHS}
    Penalty flag set to: {PENALTY}
    Latent visualization every set to: {LATENT_VIS_EVERY}
    Use MDN flag set to: {USE_MDN}
    Dataset set to: {DATASET}
    Patience: {PATIENCE}
    """)
    

# initialise model------------------------------------------------------------


args = argparse.Namespace(
        BATCH_SIZE=8,
        BLOCK_SIZE=16,
        DROPOUT=0.2,
        LEARNING_RATE=0.0001,
        EPOCHS=30000,
        FRAMES_GENERATE=300,
        TRAIN=False,
        EVAL_EVERY=1000,
        CHECKPOINT_PATH="checkpoints/proto9_checkpoint.pth",
        L1_LAMBDA=None,
        L2_REG=0.0,
        FINETUNE=False,
        FINE_TUNING_LR=1e-5,
        FINE_TUNING_EPOCHS=100000,
        PENALTY=False,
        LATENT_VIS_EVERY=1000,
        USE_MDN = True,
        PATIENCE= 35, #multiple of EVAL_EVERY * 10 - no early stopping if patience =0
        DATASET = "all",
        
        # NOTES---------------------------------
        notes = f"""Proto8 - # Define  MDN loss scheduling parameters - 
         # Define  MDN loss scheduling parameters
         linear rate increase
            
        trying to adapt Pette et al 2019, addign latent visualisation and analysing latent space. Might be slow, maybe take this out when live.
        
        Added MDN to increase variance of output as Bishop et al 1994. and Alemi et al 2017.
        
        Scheduling MDN weight to increase over time for loss so that mse loss has better chance of converging first because otherwise MDN loss is overpowering it.
        Currently linear function but maybe change this to exponential. 
        
        Updated loss to loss = F.mse_loss(logits, targets) + (F.mse_loss(emotion_logits, emotions)) + mdn.mdn_loss(pi, sigma, mu, targets)
        see if that will help with noise
        
        
        convert from random sampling MDN to find the index of the most probable Gaussian component hopefully will lead to smoother outputs
        
        adjusted sampling to /100 of sigma, hopefully will lead to smoother outputs
        
        all data

        All data, added 10% noise to emotions so model is less stuck. With LeakyRelu
        Loss = mse_loss(keypoints) + mse_loss(emotions) because before output emotions ( which feature was added to keypoint features) were not being matched to input emotions
        No penalty.

        Added dropout to keypoints, also changed input to emotion linear to x and not just emotion (emotion + keypoints)
        Taking extra dropout for emotions and keypoints out, because want model to rely on both equally so what's the point

        dropout keypoints and dropout emotion is currently equal but might change this.

        Emotions and keypoints are multimodal and added separately, but features are added in block processing using +.


        Got rid of both L1 and L2, increasing dropout because model acting weird, this is now delta + coord. 
        Delta is between next frame and current frame. So current frame is previous coord+previous delta. Last frame's delta is 0. 
        """
    )
# If args are provided, use those; otherwise, parse from command line
if args is None:
    args = parse_args()

# Set the global variables based on args
set_globals(args)

# Set global variables

processed_data= prep_data(dataset=args.DATASET)
# global train_data,train_emotions, val_data, val_emotions, frame_dim, max_x, min_x, max_y, min_y, max_dx, min_dx, max_dy, min_dy, threshold
train_data, train_emotions, val_data, val_emotions, frame_dim, max_x, min_x, max_y, min_y, max_dx, min_dx, max_dy, min_dy, threshold = processed_data

# create model
# global m
m = MotionModel(input_dim=frame_dim, output_dim=frame_dim,emotion_dim=7, blocksize=args.BLOCK_SIZE, hidden_dim=512, n_layers=8, dropout=args.DROPOUT)
m = m.to(device)

optimizer = torch.optim.Adam(m.parameters(), lr=args.LEARNING_RATE, weight_decay=args.L2_REG)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)
# Load the model
print('Loading model...')


m, optimizer, scheduler, epoch, loss, train_seed = load_checkpoint(m, optimizer, args.CHECKPOINT_PATH,scheduler)

try:

    if USE_MDN:
        print('MDN layer is used.')
        keypoints_loss, emotion_loss, mdn_loss = loss
        total_loss = keypoints_loss + emotion_loss + mdn_loss
    else:
        print('MDN layer is not used.')
        keypoints_loss, emotion_loss = loss
        total_loss = keypoints_loss + emotion_loss
    print(f"Model {train_seed} loaded from {CHECKPOINT_PATH} (epoch {epoch}, keypoints loss: {keypoints_loss:.6f}, emotion loss: {emotion_loss:.6f} , total loss: {total_loss:.6f})")

except TypeError:
    print(f"Model {train_seed} loaded from {CHECKPOINT_PATH} (epoch {epoch}, total loss: {loss:.6f})")

# Functions
def normalise_generated(unnorm_out, max_x, min_x, max_y, min_y, max_dx, min_dx, max_dy, min_dy): 
    norm_out = []
    
    for frame in unnorm_out:
        norm_frame = []
        
        # Normalize the first 50 values (absolute x and y coordinates)
        for i in range(0, 50, 2):
            unnormalized_x = frame[i]
            unnormalized_y = frame[i+1]
            
            norm_x = 2 * (unnormalized_x - min_x) / (max_x - min_x) - 1
            norm_y = 2 * (unnormalized_y - min_y) / (max_y - min_y) - 1
            
            norm_frame.extend([norm_x, norm_y])
        
        # Append the emotion encoding without normalizing
    
        norm_out.append(norm_frame)
        
    return norm_out


# Initial setup
shared_data = {
    'average_scores': [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
}

# different from normal emotion labels - matches the sentiment analyser
emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']

emotion_data = {emotion: {"score": 0.0, "count": 0} for emotion in emotion_labels}

chat = pytchat.create(video_id="gCNeDWCI0vo")
FRAMES_GENERATE = 150
terminate_threads = False

# This queue will hold the batches ready for visualization
viz_queue = queue.Queue()

def process_chat_message(c):
    """Process a chat message and update emotion scores."""
    print(f"{c.datetime} [{c.author.name}]- {c.message}")
    result = pipe(c.message)  # Assuming pipe() returns emotion prediction
    print(result)

    detected_emotion = result[0]['label']

    # Reset the counter for the detected emotion and boost its score
    emotion_data[detected_emotion]["count"] = 0
    score = result[0]['score']
    emotion_data[detected_emotion]["score"] = min(1, emotion_data[detected_emotion]["score"] + score)

    # Decay scores for other emotions and increase their counters
    for emotion, data in emotion_data.items():
        if emotion != detected_emotion:
            data["count"] += 1
            if data["count"] >= 5:
                data["score"] = 0
            else:
                data["score"] *= 0.5  # or any other decay factor you prefer

    # Normalize the scores so they add up to 1
    total_score = sum(data["score"] for data in emotion_data.values())
    if total_score > 0:
        for emotion in emotion_labels:
            emotion_data[emotion]["score"] /= total_score

    # Update average scores
    for i, emotion in enumerate(emotion_labels):
        shared_data['average_scores'][i] = emotion_data[emotion]["score"]

    print("Average scores:", shared_data['average_scores'])

# Batch generation function
def generate_new_batch(last_frame=None):
    """Generate a new batch based on the current average scores."""
    # If initial_data is None or empty, initialize with default values
    if last_frame is None:
        print('LAST FRAME IS NONE')
        last_frame = torch.randn(1,5, 50).to(device)  # initialise with noise

    last_frames = last_frame[0][-3:]
    norm_last_frames = normalise_generated(last_frames, max_x, min_x, max_y, min_y, max_x, min_x, max_y, min_y)
    new_input = torch.tensor([norm_last_frames]).to(device).float()
    emotion_in = torch.tensor([shared_data['average_scores']]).to(device).float()

    # Generate the new frames
    generated_keypoints, generated_emotion = m.generate(new_input, emotion_in, FRAMES_GENERATE)
    
    emotion_vectors = (emotion_in, generated_emotion)
    return unnormalise_list_2D(generated_keypoints, max_x, min_x, max_y, min_y, max_x, min_x, max_y, min_y), emotion_vectors

def generate_batches_periodically(period=2, last_frame=None):
    # initialise with last_frame = None
    while not terminate_threads:  
        time.sleep(period)
        unnorm_out, emotion_vectors = generate_new_batch(last_frame)
        viz_queue.put((unnorm_out, emotion_vectors))  
        last_frame = unnorm_out
        

def visualise(unnorm_out, emotion_vectors):
    # visualize
    emotion_in, generated_emotion = emotion_vectors 
    emotion_vectors = (emotion_in[0], generated_emotion[0]) #quick fix
    
    visualise_skeleton(unnorm_out[0], max_x, max_y, emotion_vectors,max_frames=FRAMES_GENERATE,save = False,save_path=None,prefix=f'{EPOCHS}_main_test',train_seed=train_seed,delta=False,destroy=False)

def visualise_batches():
    while not terminate_threads:  # Check the global termination flag
        batch = viz_queue.get()  # Get the tuple from the queue
        if batch is None:  # Check if the thread should terminate
            break
        unnorm_out, emotion_vectors = batch  # Unpack the tuple
        visualise(unnorm_out, emotion_vectors)

# Start the threads
visualisation_thread = threading.Thread(target=visualise_batches, daemon=True)
generation_thread = threading.Thread(target=generate_batches_periodically, args=(10,), daemon=True)

visualisation_thread.start()
generation_thread.start()


# Process chat messages
while chat.is_alive():
    if keyboard.is_pressed('esc'):  # Check if ESC key is pressed
        terminate_threads = True
        viz_queue.put(None)  # Put a None in the queue to signal the visualisation thread to terminate
        break  # Exit the main loop
    for c in chat.get().sync_items():
        process_chat_message(c)

cv2.destroyAllWindows()

# Wait for threads to finish if needed
visualisation_thread.join()
generation_thread.join()


    Batch size set to: 8
    Block size set to: 16
    Dropout rate set to: 0.2
    Learning rate set to: 0.0001
    Number of epochs set to: 30000
    Frames to generate set to: 300
    Training mode set to: False
    Evaluation every set to: 1000
    Checkpoint path set to: checkpoints/proto9_checkpoint.pth
    L1 regularization lambda set to: None
    L2 regularization lambda set to: 0.0
    Fine-tuning mode set to: False
    Fine-tuning learning rate set to: 1e-05
    Fine-tuning epochs set to: 100000
    Penalty flag set to: False
    Latent visualization every set to: 1000
    Use MDN flag set to: True
    Dataset set to: all
    Patience: 35
    
Preparing data for all...


100%|██████████| 4102/4102 [00:04<00:00, 1010.52it/s]


Validating interpolation...
No errors found!


4102it [00:01, 2219.69it/s]
4102it [00:01, 2215.37it/s]
100%|██████████| 109/109 [00:03<00:00, 27.99it/s]


Validating interpolation...
No errors found!


109it [00:00, 286.28it/s]
109it [00:00, 280.17it/s]


Creating keypoint frames...


100%|██████████| 4211/4211 [00:02<00:00, 1727.03it/s]


Creating keypoint frames...


100%|██████████| 4211/4211 [00:02<00:00, 1636.29it/s]


Validating length of dkp_frames
Validating length of kp_frames
frame_dim: 50
Loading model...
Loading checkpoint...
Checkpoint loaded from checkpoints/proto9_checkpoint.pth
MDN layer is used.
Model 41831 loaded from checkpoints/proto9_checkpoint.pth (epoch 30000, total loss: -3.157223)
2023-11-16 22:57:55 [Michael Ryan]- Germany supports antisemitism as Arabs are Semites. Germany killed millions of people lots were Jews they can’t talk about antisemitism they are just because the war ended antisemitism didn’t 
[{'label': 'neutral', 'score': 0.37312617897987366}]
Average scores: [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]
LAST FRAME IS NONE


Exception in thread Thread-7 (generate_batches_periodically):
Traceback (most recent call last):
  File "c:\Users\avika\anaconda3\envs\interactive_dance_thesis\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "c:\Users\avika\anaconda3\envs\interactive_dance_thesis\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\avika\AppData\Local\Temp\ipykernel_27428\758117282.py", line 303, in generate_batches_periodically
  File "C:\Users\avika\AppData\Local\Temp\ipykernel_27428\758117282.py", line 294, in generate_new_batch
  File "c:\Users\avika\OneDrive\Documents\UAL\interactive_dance_thesis\model.py", line 304, in generate
    pi, sigma, mu, logits, emotion_logits, _, _ = self(inputs=cond_sequence, emotions=generated_emotions)
  File "c:\Users\avika\anaconda3\envs\interactive_dance_thesis\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "c:\Users\avika\

2023-11-16 22:58:02 [Mary M.]- Why does Hamas hide behind children?
[{'label': 'neutral', 'score': 0.5105586647987366}]
Average scores: [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]
2023-11-16 22:58:11 [Paperchase 4eva]- ALLAH IS IMPOTENT HE CANT MAKE A SON OUR GOD CAN DO ALL THINGS JESUS IS THE SON OF GOD
[{'label': 'anger', 'score': 0.8048244714736938}]
Average scores: [0.6168066962790095, 0.0, 0.0, 0.0, 0.3831933037209904, 0.0, 0.0]
2023-11-16 22:58:15 [Adriana Bonita Aziz]- Shaytanyahu
[{'label': 'neutral', 'score': 0.9467083215713501}]
Average scores: [0.23570969042386014, 0.0, 0.0, 0.0, 0.7642903095761399, 0.0, 0.0]
2023-11-16 22:58:20 [Nimo Nino]- 48 hours lol what they doing digging tunnel
[{'label': 'neutral', 'score': 0.8934422731399536}]
Average scores: [0.10542947120257495, 0.0, 0.0, 0.0, 0.894570528797425, 0.0, 0.0]
2023-11-16 22:58:24 [Abu Nusaybah]- :heart_suit:🇵🇸:heart_suit:🇵🇸:heart_suit:🇵🇸:heart_suit:🇵🇸:heart_suit:🇵🇸HASBUNALLAHU WA NI'MAL WAKEEL:heart_suit:🇵🇸:heart_suit:🇵🇸:heart

In [1]:
# load and generate each emotion separately

import threading
import queue
import time
import keyboard
import queue
from model import *
import pytchat
from data import *

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model="michellejieli/emotion_text_classifier")
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("michellejieli/emotion_text_classifier")
model = AutoModelForSequenceClassification.from_pretrained("michellejieli/emotion_text_classifier")

# Define a function to set global variables
def set_globals(args):
    global PATIENCE, BATCH_SIZE,DATASET, BLOCK_SIZE, DROPOUT, LEARNING_RATE, EPOCHS, FRAMES_GENERATE, TRAIN, EVAL_EVERY, CHECKPOINT_PATH, L1_LAMBDA, L2_REG, FINETUNE, FINE_TUNING_LR, FINE_TUNING_EPOCHS, PENALTY, LATENT_VIS_EVERY, notes,USE_MDN
    BATCH_SIZE = args.BATCH_SIZE
    BLOCK_SIZE = args.BLOCK_SIZE
    DROPOUT = args.DROPOUT
    LEARNING_RATE = args.LEARNING_RATE
    EPOCHS = args.EPOCHS
    FRAMES_GENERATE = args.FRAMES_GENERATE
    TRAIN = args.TRAIN
    EVAL_EVERY = args.EVAL_EVERY
    CHECKPOINT_PATH = args.CHECKPOINT_PATH
    L1_LAMBDA = args.L1_LAMBDA
    L2_REG = args.L2_REG
    FINETUNE = args.FINETUNE
    FINE_TUNING_LR = args.FINE_TUNING_LR
    FINE_TUNING_EPOCHS = args.FINE_TUNING_EPOCHS
    PENALTY = args.PENALTY
    LATENT_VIS_EVERY = args.LATENT_VIS_EVERY
    USE_MDN = args.USE_MDN
    DATASET = args.DATASET
    notes = args.notes
    PATIENCE = args.PATIENCE
    
    # ---------------------------------
    notes = f"""Proto8 - trying to adapt Pette et al 2019, addign latent visualisation and analysing latent space. Might be slow, maybe take this out when live.

    
    Added MDN layer to model.
    
    All data, added 10% noise to emotions so model is less stuck. With LeakyRelu
    Loss = mse_loss(keypoints) + mse_loss(emotions) because before output emotions ( which feature was added to keypoint features) were not being matched to input emotions
    No penalty.

    Added dropout to keypoints, also changed input to emotion linear to x and not just emotion (emotion + keypoints)
    Taking extra dropout for emotions and keypoints out, because want model to rely on both equally so what's the point

    dropout keypoints and dropout emotion is currently equal but might change this.

    Emotions and keypoints are multimodal and added separately, but features are added in block processing using +.


    Got rid of both L1 and L2, increasing dropout because model acting weird, this is now delta + coord. 
    Delta is between next frame and current frame. So current frame is previous coord+previous delta. Last frame's delta is 0. 
    
    {BATCH_SIZE} batch size, {BLOCK_SIZE} block size, {DROPOUT} dropout, {LEARNING_RATE} learning rate, {EPOCHS} epochs, {FRAMES_GENERATE} frames generated, {TRAIN} train, {EVAL_EVERY} eval every, {CHECKPOINT_PATH} checkpoint path, {L1_LAMBDA} L1 lambda, {L2_REG} L2 reg"""
    # ---------------------------------
    
    # Print the values using f-string for formatting
    print(f"""
    Batch size set to: {BATCH_SIZE}
    Block size set to: {BLOCK_SIZE}
    Dropout rate set to: {DROPOUT}
    Learning rate set to: {LEARNING_RATE}
    Number of epochs set to: {EPOCHS}
    Frames to generate set to: {FRAMES_GENERATE}
    Training mode set to: {TRAIN}
    Evaluation every set to: {EVAL_EVERY}
    Checkpoint path set to: {CHECKPOINT_PATH}
    L1 regularization lambda set to: {L1_LAMBDA}
    L2 regularization lambda set to: {L2_REG}
    Fine-tuning mode set to: {FINETUNE}
    Fine-tuning learning rate set to: {FINE_TUNING_LR}
    Fine-tuning epochs set to: {FINE_TUNING_EPOCHS}
    Penalty flag set to: {PENALTY}
    Latent visualization every set to: {LATENT_VIS_EVERY}
    Use MDN flag set to: {USE_MDN}
    Dataset set to: {DATASET}
    Patience: {PATIENCE}
    """)
    

# initialise model------------------------------------------------------------


args = argparse.Namespace(
        BATCH_SIZE=8,
        BLOCK_SIZE=16,
        DROPOUT=0.2,
        LEARNING_RATE=0.0001,
        EPOCHS=30000,
        FRAMES_GENERATE=300,
        TRAIN=False,
        EVAL_EVERY=1000,
        CHECKPOINT_PATH="checkpoints/proto9_checkpoint_emotion3.pth",
        L1_LAMBDA=None,
        L2_REG=0.0,
        FINETUNE=False,
        FINE_TUNING_LR=1e-5,
        FINE_TUNING_EPOCHS=100000,
        PENALTY=False,
        LATENT_VIS_EVERY=1000,
        USE_MDN = True,
        PATIENCE= 35, #multiple of EVAL_EVERY * 10 - no early stopping if patience =0
        DATASET = "all",
        
        # NOTES---------------------------------
        notes = f"""Proto8 - # Define  MDN loss scheduling parameters - 
         # Define  MDN loss scheduling parameters
         linear rate increase
            
        trying to adapt Pette et al 2019, addign latent visualisation and analysing latent space. Might be slow, maybe take this out when live.
        
        Added MDN to increase variance of output as Bishop et al 1994. and Alemi et al 2017.
        
        Scheduling MDN weight to increase over time for loss so that mse loss has better chance of converging first because otherwise MDN loss is overpowering it.
        Currently linear function but maybe change this to exponential. 
        
        Updated loss to loss = F.mse_loss(logits, targets) + (F.mse_loss(emotion_logits, emotions)) + mdn.mdn_loss(pi, sigma, mu, targets)
        see if that will help with noise
        
        
        convert from random sampling MDN to find the index of the most probable Gaussian component hopefully will lead to smoother outputs
        
        adjusted sampling to /100 of sigma, hopefully will lead to smoother outputs
        
        all data

        All data, added 10% noise to emotions so model is less stuck. With LeakyRelu
        Loss = mse_loss(keypoints) + mse_loss(emotions) because before output emotions ( which feature was added to keypoint features) were not being matched to input emotions
        No penalty.

        Added dropout to keypoints, also changed input to emotion linear to x and not just emotion (emotion + keypoints)
        Taking extra dropout for emotions and keypoints out, because want model to rely on both equally so what's the point

        dropout keypoints and dropout emotion is currently equal but might change this.

        Emotions and keypoints are multimodal and added separately, but features are added in block processing using +.


        Got rid of both L1 and L2, increasing dropout because model acting weird, this is now delta + coord. 
        Delta is between next frame and current frame. So current frame is previous coord+previous delta. Last frame's delta is 0. 
        """
    )
# If args are provided, use those; otherwise, parse from command line
if args is None:
    args = parse_args()

# Set the global variables based on args
set_globals(args)

# Set global variables

processed_data= prep_data(dataset=args.DATASET)
# global train_data,train_emotions, val_data, val_emotions, frame_dim, max_x, min_x, max_y, min_y, max_dx, min_dx, max_dy, min_dy, threshold
train_data, train_emotions, val_data, val_emotions, frame_dim, max_x, min_x, max_y, min_y, max_dx, min_dx, max_dy, min_dy, threshold = processed_data

# create model
# global m
m = MotionModel(input_dim=frame_dim, output_dim=frame_dim,emotion_dim=7, blocksize=args.BLOCK_SIZE, hidden_dim=512, n_layers=8, dropout=args.DROPOUT)
m = m.to(device)

optimizer = torch.optim.Adam(m.parameters(), lr=args.LEARNING_RATE, weight_decay=args.L2_REG)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)
# Load the model
print('Loading model...')


m, optimizer, scheduler, epoch, loss, train_seed = load_checkpoint(m, optimizer, args.CHECKPOINT_PATH,scheduler)

try:

    if USE_MDN:
        print('MDN layer is used.')
        keypoints_loss, emotion_loss, mdn_loss = loss
        total_loss = keypoints_loss + emotion_loss + mdn_loss
    else:
        print('MDN layer is not used.')
        keypoints_loss, emotion_loss = loss
        total_loss = keypoints_loss + emotion_loss
    print(f"Model {train_seed} loaded from {CHECKPOINT_PATH} (epoch {epoch}, keypoints loss: {keypoints_loss:.6f}, emotion loss: {emotion_loss:.6f} , total loss: {total_loss:.6f})")

except TypeError:
    print(f"Model {train_seed} loaded from {CHECKPOINT_PATH} (epoch {epoch}, total loss: {loss:.6f})")

# Functions
def normalise_generated(unnorm_out, max_x, min_x, max_y, min_y, max_dx, min_dx, max_dy, min_dy): 
    norm_out = []
    
    for frame in unnorm_out:
        norm_frame = []
        
        # Normalize the first 50 values (absolute x and y coordinates)
        for i in range(0, 50, 2):
            unnormalized_x = frame[i]
            unnormalized_y = frame[i+1]
            
            norm_x = 2 * (unnormalized_x - min_x) / (max_x - min_x) - 1
            norm_y = 2 * (unnormalized_y - min_y) / (max_y - min_y) - 1
            
            norm_frame.extend([norm_x, norm_y])
        
        # Append the emotion encoding without normalizing
    
        norm_out.append(norm_frame)
        
    return norm_out

emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']

# Generate one-hot encodings for each emotion
num_emotions = len(emotion_labels)
emotions = {emotion: [1 if i == index else 0 for i in range(num_emotions)] 
                    for index, emotion in enumerate(emotion_labels)}

last_frame = None

for label, emotion in emotions.items():
    # different from normal emotion labels - matches the sentiment analyser
    
    print(emotion)
  
    last_frame = torch.randn(1,3, 50).to(device)  # initialise with noise

    last_frames = last_frame
    # norm_last_frames = normalise_generated(last_frames, max_x, min_x, max_y, min_y, max_x, min_x, max_y, min_y)
    new_input = torch.tensor(last_frames).to(device)
    emotion_in = torch.tensor([emotion]).to(device).float()

    # Generate the new frames
    generated_keypoints, generated_emotion = m.generate(new_input, emotion_in, FRAMES_GENERATE)
    

    emotion_vectors = (emotion_in[0], generated_emotion[0])
    unnorm_out = unnormalise_list_2D(generated_keypoints, max_x, min_x, max_y, min_y, max_x, min_x, max_y, min_y)
    
    # visualise_skeleton(unnorm_out[0], max_x, max_y, emotion_vectors,max_frames=FRAMES_GENERATE,save = True,save_path=None,prefix=f'{label}_{EPOCHS}_main_test',train_seed=train_seed,delta=False,destroy=False)

    

C:\Users\avika\OneDrive\Documents\UAL\interactive_dance_thesis

    Batch size set to: 8
    Block size set to: 16
    Dropout rate set to: 0.2
    Learning rate set to: 0.0001
    Number of epochs set to: 30000
    Frames to generate set to: 300
    Training mode set to: False
    Evaluation every set to: 1000
    Checkpoint path set to: checkpoints/proto9_checkpoint_emotion3.pth
    L1 regularization lambda set to: None
    L2 regularization lambda set to: 0.0
    Fine-tuning mode set to: False
    Fine-tuning learning rate set to: 1e-05
    Fine-tuning epochs set to: 100000
    Penalty flag set to: False
    Latent visualization every set to: 1000
    Use MDN flag set to: True
    Dataset set to: all
    Patience: 35
    
Preparing data for all...


100%|██████████| 1326/1326 [00:01<00:00, 1104.44it/s]


Validating interpolation...
No errors found!


1326it [00:00, 2279.45it/s]
1326it [00:00, 2407.49it/s]
100%|██████████| 153/153 [00:08<00:00, 18.26it/s]


Validating interpolation...
No errors found!


153it [00:00, 199.65it/s]
153it [00:00, 197.75it/s]


In [None]:
import json

# Assuming unnorm_out[0] is your data
data_to_save = unnorm_out[0]

# Save data to a JSON file
with open('data.json', 'w') as file:
    json.dump(data_to_save, file)


In [6]:
import pyglet
import math
import time
from pyglet.shapes import Circle

def visualise_body(all_frames, max_x, max_y, max_frames=500):
    # Pyglet window initialization
    window = pyglet.window.Window(int(max_x) + 50, int(max_y) + 50)

    # Define the limb connections based on keypoints
    limb_connections = [
        (0, 1), (1, 2), (2, 3), (3, 4),
        (1, 5), (5, 6), (6, 7),
        (1, 8), (8, 9), (9, 10), (10, 11),
        (8, 12), (12, 13), (13, 14),
        (0, 15), (15, 16), (0, 17), (17, 18),
        (14, 19), (14, 20), (14, 21),
        (11, 22), (11, 23), (11, 24)
    ]

    # Load the same image for all limb connections
    limb_image_path = 'data/leg.png'
    limb_sprites = {limb: pyglet.sprite.Sprite(pyglet.image.load(limb_image_path)) for limb in limb_connections}

    # Function to draw each frame
    def draw_frame(frame_data):
        
        for i in range(25):  # Assuming 25 keypoints
            x = frame_data[i * 2]
            y = frame_data[i * 2 + 1]
            circle = Circle(x, y, 5, color=(0, 255, 0))
            circle.draw()
            
        for limb, sprite in limb_sprites.items():
            start_idx, end_idx = limb
            start_x, start_y = frame_data[start_idx * 2], frame_data[start_idx * 2 + 1]
            end_x, end_y = frame_data[end_idx * 2], frame_data[end_idx * 2 + 1]

            # Calculate midpoint, angle, and distance
            mid_x, mid_y = (start_x + end_x) / 2, (start_y + end_y) / 2
            angle = math.atan2(end_y - start_y, end_x - start_x)
            distance = math.sqrt((end_x - start_x) ** 2 + (end_y - start_y) ** 2)

            # Set sprite properties
            sprite.x, sprite.y = mid_x - sprite.width / 2, mid_y - sprite.height / 2
            sprite.rotation = -math.degrees(angle)
            
            if sprite.width ==0:
                sprite.width = 1
                print('width is 0')
            
            sprite.scale = distance / max(sprite.width, sprite.height)
            

            sprite.draw()


    # Pyglet draw event
    @window.event
    def on_draw():
        window.clear()
        if frame_index < len(all_frames):
            draw_frame(all_frames[frame_index])

    # Update function for animation
    def update(dt):
        nonlocal frame_index
        frame_index += 1
        if frame_index >= len(all_frames):
            pyglet.app.exit()

    # Schedule update
    pyglet.clock.schedule_interval(update, 1)

    # Run the Pyglet application
    frame_index = 0
    pyglet.app.run()

# Example usage
all_frames = unnorm_out[0] # Your frames data


visualise_body(all_frames, max_x, max_y)


width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0
width is 0


ZeroDivisionError: float division by zero

: 

In [20]:
# Example to check the first frame's structure
print("First frame:", all_frames[0])
print("Type of first frame:", type(all_frames[0]))
print("Length of first frame:", len(all_frames[0]))
if len(all_frames[0]) > 0:
    print("First element of the first frame:", all_frames[0][0])
    print("Type of the first element:", type(all_frames[0][0]))


First frame: [463.20245361328125, 560.21630859375, 489.4288635253906, 625.7183227539062, 56.09849548339844, 694.9973754882812, 645.4338989257812, 718.0498046875, -47.0558967590332, 1134.4161376953125, -103.01863098144531, 684.0560913085938, 81.345458984375, 640.783935546875, -60.83053207397461, 282.6039733886719, 544.1846923828125, -324.26806640625, 635.161376953125, 352.4945068359375, 341.3075256347656, 454.3798828125, 481.49658203125, 377.8548889160156, 350.2923583984375, 1350.7640380859375, -222.98667907714844, 78.2162094116211, 736.5838012695312, 1047.74169921875, -5.331160068511963, 197.80421447753906, -443.57110595703125, 642.7324829101562, 94.9349594116211, 1221.33251953125, 365.9841613769531, 654.0223388671875, 497.1720886230469, 951.89697265625, -363.64410400390625, 600.9287109375, 503.2513732910156, 1184.36181640625, -106.8148422241211, 533.2833251953125, 315.84149169921875, -262.8980407714844, -173.64852905273438, 865.5286865234375, 1.3745900392532349, -1.2749416828155518, 0

In [10]:
unnorm_out[0]

[[463.20245361328125,
  560.21630859375,
  489.4288635253906,
  625.7183227539062,
  56.09849548339844,
  694.9973754882812,
  645.4338989257812,
  718.0498046875,
  -47.0558967590332,
  1134.4161376953125,
  -103.01863098144531,
  684.0560913085938,
  81.345458984375,
  640.783935546875,
  -60.83053207397461,
  282.6039733886719,
  544.1846923828125,
  -324.26806640625,
  635.161376953125,
  352.4945068359375,
  341.3075256347656,
  454.3798828125,
  481.49658203125,
  377.8548889160156,
  350.2923583984375,
  1350.7640380859375,
  -222.98667907714844,
  78.2162094116211,
  736.5838012695312,
  1047.74169921875,
  -5.331160068511963,
  197.80421447753906,
  -443.57110595703125,
  642.7324829101562,
  94.9349594116211,
  1221.33251953125,
  365.9841613769531,
  654.0223388671875,
  497.1720886230469,
  951.89697265625,
  -363.64410400390625,
  600.9287109375,
  503.2513732910156,
  1184.36181640625,
  -106.8148422241211,
  533.2833251953125,
  315.84149169921875,
  -262.8980407714844,


tensor([[0., 0., 0., 0., 0., 0., 0.]], device='cuda:0',
       grad_fn=<ReluBackward0>)


: 

In [1]:
import pygame
from pygame.locals import *
from OpenGL.GL import *
from OpenGL.GLUT import *
from OpenGL.GLU import *

def rotate_3dpoint(p, angle, axis):
	"""Rotate a 3D point around given axis."""
	ret = [0, 0, 0]
	cosang = cos(angle)
	sinang = sin(angle)
	ret[0] += (cosang+(1-cosang)*axis[0]*axis[0])*p[0]
	ret[0] += ((1-cosang)*axis[0]*axis[1]-axis[2]*sinang)*p[1]
	ret[0] += ((1-cosang)*axis[0]*axis[2]+axis[1]*sinang)*p[2]
	ret[1] += ((1-cosang)*axis[0]*axis[1]+axis[2]*sinang)*p[0]
	ret[1] += (cosang+(1-cosang)*axis[1]*axis[1])*p[1]
	ret[1] += ((1-cosang)*axis[1]*axis[2]-axis[0]*sinang)*p[2]
	ret[2] += ((1-cosang)*axis[0]*axis[2]-axis[1]*sinang)*p[0]
	ret[2] += ((1-cosang)*axis[1]*axis[2]+axis[0]*sinang)*p[1]
	ret[2] += (cosang+(1-cosang)*axis[2]*axis[2])*p[2]
	return ret

def rotate_object(obj, angle, axis):
	"""Rotate an object around given axis."""
	for i in range(len(obj)):
		obj[i] = rotate_3dpoint(obj[i], angle, axis)

class OBJ:
    def __init__(self, filename):
        self.vertices = []
        self.faces = []
        self.load_obj(filename)

    def load_obj(self, filename):
        for line in open(filename, "r"):
            if line.startswith('#'): continue
            values = line.split()
            if not values: continue

            if values[0] == 'v':
                self.vertices.append(list(map(float, values[1:4])))
            elif values[0] == 'f':
                face = []
                for v in values[1:]:
                    w = v.split('/')
                    face.append(int(w[0]))
                self.faces.append(face)

    def render(self):
        glBegin(GL_TRIANGLES)
        for face in self.faces:
            for vertex in face:
                glVertex3fv(self.vertices[vertex - 1])
        glEnd()

def main():
    pygame.init()
    display = (800,600)
    pygame.display.set_mode(display, DOUBLEBUF|OPENGL)
    gluPerspective(45, (display[0]/display[1]), 0.1, 50.0)
    glTranslatef(0.0,0.0, -5)

    obj = OBJ("data/human_mesh.obj")  # Replace 'yourmodel.obj' with your file name

    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
                
                obj.render()

        glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT)
        glLoadIdentity()  # Reset the current modelview matrix
        # glTranslatef(0.0, 0.0, -5)  # Move back
        # glScalef(0.5, 0.5, 0.5)  # Scale down the model
        # glRotatef(1, 3, 1, 1)  # Rotate
        
        obj.render()
        pygame.display.flip()
        pygame.time.wait(10)

if __name__ == "__main__":
    main()


pygame 2.5.2 (SDL 2.28.3, Python 3.10.9)
Hello from the pygame community. https://www.pygame.org/contribute.html


NameError: name 'cos' is not defined

In [1]:
import pygame
from pygame.locals import *
from OpenGL.GL import *
from OpenGL.GLUT import *
from OpenGL.GLU import *

class OBJ:
    def __init__(self, filename):
        self.vertices = []
        self.faces = []
        self.load_obj(filename)

    def load_obj(self, filename):
        for line in open(filename, "r"):
            if line.startswith('#'): continue
            values = line.split()
            if not values: continue

            if values[0] == 'v':
                vertex = list(map(float, values[1:4]))
                self.vertices.append(vertex)
                print(f"Vertex: {vertex}")  # Print vertex coordinates
            elif values[0] == 'f':
                face = []
                for v in values[1:]:
                    w = v.split('/')
                    face.append(int(w[0]))
                self.faces.append(face)

    def render(self):
        glBegin(GL_TRIANGLES)
        for face in self.faces:
            for vertex in face:
                glVertex3fv(self.vertices[vertex - 1])
        glEnd()

def main():
    pygame.init()
    display = (800, 600)
    pygame.display.set_mode(display, DOUBLEBUF|OPENGL)
    gluPerspective(45, (display[0]/display[1]), 0.1, 50.0)

    camera_x, camera_y, camera_z = 0, 0, -5  # Initial camera position

    obj = OBJ("data/human_mesh.obj")  # Replace with your file name

    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()

            # Keyboard events to move the camera
            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_LEFT:
                    camera_x -= 1
                if event.key == pygame.K_RIGHT:
                    camera_x += 1
                if event.key == pygame.K_UP:
                    camera_y += 1
                if event.key == pygame.K_DOWN:
                    camera_y -= 1
                if event.key == pygame.K_w:
                    camera_z += 1
                if event.key == pygame.K_s:
                    camera_z -= 1

        print(f"Camera position: x={camera_x}, y={camera_y}, z={camera_z}")  # Print camera position

        glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT)
        glLoadIdentity()
        glTranslatef(camera_x, camera_y, camera_z)
        # glScalef(0.01, 0.01, 0.01)  # Scale down
        obj.render()
        pygame.display.flip()
        pygame.time.wait(10)

if __name__ == "__main__":
    main()


pygame 2.5.2 (SDL 2.28.3, Python 3.10.9)
Hello from the pygame community. https://www.pygame.org/contribute.html
Vertex: [-2.046645, 3.723751, 0.234023]
Vertex: [-2.033502, 3.694018, 0.320598]
Vertex: [-1.954756, 3.779671, 0.283552]
Vertex: [-1.981371, 3.811183, 0.208978]
Vertex: [-1.99044, 3.782394, 0.131055]
Vertex: [-2.035621, 3.710389, 0.151686]
Vertex: [-1.963693, 3.713332, 0.09021]
Vertex: [-2.003813, 3.670851, 0.097007]
Vertex: [-1.92891, 3.675715, 0.09525]
Vertex: [-1.969666, 3.632631, 0.088739]
Vertex: [-1.944764, 3.587394, 0.137247]
Vertex: [-1.895553, 3.655688, 0.134041]
Vertex: [-1.861192, 3.56594, 0.347554]
Vertex: [-1.864283, 3.652548, 0.220254]
Vertex: [-1.906357, 3.703779, 0.323299]
Vertex: [-1.924495, 3.590654, 0.409698]
Vertex: [-2.120802, 3.619246, 0.357665]
Vertex: [-2.134044, 3.633794, 0.258222]
Vertex: [-2.196639, 3.558287, 0.282322]
Vertex: [-2.184078, 3.554246, 0.385861]
Vertex: [-2.124865, 3.624176, 0.161567]
Vertex: [-2.090089, 3.590455, 0.097697]
Vertex: [-2.

GLError: GLError(
	err = 1282,
	description = b'invalid operation',
	baseOperation = glClear,
	cArguments = (16640,)
)

: 

In [17]:
import pyglet

from pyglet.gl import *

glEnable(GL_BLEND)
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)



# Initialize a Pyglet window
window = pyglet.window.Window(width=800, height=600, caption='Transparent Image Display')

window.config.alpha_size = 8

# Load the transparent image
# Ensure the image path is correct and the image supports transparency (like PNG)
image_path = 'data/leg.png'
image = pyglet.resource.image(image_path)

# The on_draw event is triggered to draw the window's content
@window.event
def on_draw():
    window.clear()  # Clear the window
    image.blit(200, 50)  # Draw the image at position (100, 100)

# Run the application
pyglet.app.run()


# Playing with rendering

In [1]:
import pyglet
import pyglet.clock
from pyglet.gl import *

window = pyglet.window.Window()

window.config.alpha_size = 8

#fancy text
text = pyglet.resource.image("data/leg.png")

# #background image
bg = pyglet.resource.image("data/smplx/test_2.jpg")

@window.event
def on_draw():
    window.clear()
    glEnable(GL_BLEND)

    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
    bg.blit(0, 0)
    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
    text.blit(100, 100)

pyglet.app.run()

In [3]:
import pyglet
from pyglet.gl import *
import numpy as np
from PIL import Image
import random

# Initialize Pyglet
pyglet.options['shadow_window'] = False  # Disable shadow window
pyglet.gl.glConfig.set_hardware_acceleration("None")  # Disable hardware acceleration
pyglet.options['debug_gl'] = False  # Disable OpenGL debugging

# Load the image and convert it to grayscale
image_path = 'data/leg.png'  # Replace with your image path
human_img = Image.open(image_path).convert('L')  # Convert to grayscale

# Convert to numpy array and normalize
image_array = np.asarray(human_img) / 255.0

# Invert intensities for the stippling effect (darker areas have more dots)
intensity = image_array

# Prepare a list to hold the dots
dots = []

# Define stippling parameters
num_dots = 10000  # total number of dots to be distributed
max_dot_size = 2.5  # maximum size of dot

# Generate dots
for _ in range(num_dots):
    x = random.randint(0, intensity.shape[1] - 1)
    y = random.randint(0, intensity.shape[0] - 1)

    # Use the inverted intensity as a probability weight for placing a dot
    if random.random() < intensity[y, x]:
        dot_size = intensity[y, x] * max_dot_size
        dots.append((x, y, dot_size))

# Create a Pyglet window
window = pyglet.window.Window()

# Load the background image
bg = pyglet.resource.image("data/smplx/test_2.jpg")

@window.event
def on_draw():
    window.clear()
    glEnable(GL_BLEND)

    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
    bg.blit(0, 0)
    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)

    # Draw the dots
    glBegin(GL_QUADS)
    for x, y, size in dots:
        glColor4f(1.0, 1.0, 1.0, 1.0)  # Set color to white with full opacity
        glVertex2f(x - size / 2, y - size / 2)  # Bottom-left
        glVertex2f(x + size / 2, y - size / 2)  # Bottom-right
        glVertex2f(x + size / 2, y + size / 2)  # Top-right
        glVertex2f(x - size / 2, y + size / 2)  # Top-left
    glEnd()

pyglet.app.run()


AttributeError: module 'pyglet.gl' has no attribute 'glConfig'