In [115]:
import os
import numpy as np
import pandas as pd

# Définition des chemins des dossiers
txt_dir = '/Users/othmaneirhboula/Downloads/HumanML3D_Challenge/texts'
npy_dir = '/Users/othmaneirhboula/Downloads/HumanML3D_Challenge/motions'
train_file = '/Users/othmaneirhboula/Downloads/HumanML3D_Challenge/train.txt'  # Chemin du fichier train.txt

# Charger les noms des fichiers autorisés depuis train.txt
with open(train_file, 'r', encoding="utf-8") as f:
    valid_files = set(f.read().splitlines())

data_ = []

# Parcourir les fichiers autorisés
for base_name in valid_files:
    npy_file = os.path.join(npy_dir, base_name + '.npy')
    txt_file = os.path.join(txt_dir, base_name + '.txt')

    # Vérifier si les fichiers existent avant de les charger
    if os.path.exists(npy_file) and os.path.exists(txt_file):
        with open(txt_file, 'r', encoding="utf-8") as file:
            txt_content = file.read().split('\n')  # Lire le fichier texte ligne par ligne

        npy_content = np.load(npy_file, allow_pickle=True)  # Charger le fichier .npy
        data_.append([npy_content, txt_content])

# Création du DataFrame
df = pd.DataFrame(data_, columns=["motion", "text"])

print(f"Chargé {len(df)} paires de fichiers .npy et .txt.")

# Trouver la taille maximale des vecteurs pour uniformiser (padding)
max_len = max(arr.shape[0] for arr in df["motion"])

# Fonction de padding pour uniformiser les dimensions
def pad_array(arr, max_len):
    if arr.shape[0] < max_len:
        return np.pad(arr, (0, max_len - arr.shape[0]), mode='constant')
    return arr[:max_len]  # Troncature si nécessaire

# Appliquer le padding et transformer en matrice
X = np.vstack(df["motion"].apply(lambda x: pad_array(x.flatten(), max_len)).values)


Chargé 23384 paires de fichiers .npy et .txt.


In [105]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [190]:
X.shape

(23384, 469)

In [183]:
from sklearn.neighbors import NearestNeighbors

knn = NearestNeighbors(n_neighbors=2, metric='euclidean', algorithm='brute')
knn.fit(X)
def closest_id_neighbor(id):
    testdata = np.load('/Users/othmaneirhboula/Downloads/HumanML3D_Challenge/motions/'+id+'.npy')

    # Appliquer flatten() et pad_array() sans utiliser apply()
    data_padded = pad_array(testdata.flatten(), max_len)
    distances, indices = knn.kneighbors([data_padded])

    # Trouver l'indice du voisin le plus proche
    nearest_index = indices[0][0]

    # Récupérer les textes associés
    # Lire le texte depuis le fichier .txt
    nearest_text = str(df.iloc[nearest_index]["text"])
    sentence1 = nearest_text.split("#")[0][2:]
    sentence2 = nearest_text.split("#")[3][7:]
    # sentence3 = nearest_text.split("#")[6][7:]
    nearest_text = max([sentence1, sentence2], key=len)

    # Afficher les résultats
    return id, nearest_text


In [1]:
import numpy as np
from nltk.translate.bleu_score import sentence_bleu
def score(gt_texts, generated_text) -> float:
    """Calculate BLEU score"""

    # Get 3 ground truth references
    refs = [d.split(' ') for d in gt_texts]
    # Get single submission candidate
    gen = generated_text.split(' ')
    # Calculate blue score
    bleu_score = sentence_bleu(refs, gen)

    return bleu_score

## Usage Example

## Usage Example
gt_texts = ["abcd efgh ijkl mnop", "qrst uvwx  yzab", "cdef ghijk lmnop qrst"
           ]
generated_text = "abcd uvwx lmnop efgh"

score(gt_texts, generated_text)

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


1.821831989445342e-231

In [184]:
print(closest_id_neighbor('M014346'))


('M014346', 'a person lowers their arms, jumps, and then walks forward.')


In [185]:
test_files = pd.read_csv('test.txt' , header=None,dtype=str)
print(test_files)
data_sub = []
for i in range (len(test_files)):
    data_sub.append(closest_id_neighbor(test_files.iloc[i,0]))

          0
0    004822
1    014457
2    009613
3    008463
4    014160
..      ...
984  003942
985  002315
986  005531
987  004043
988  009373

[989 rows x 1 columns]


In [159]:
print(data_sub)

[('004822', 'a person is walking in place.'), ('014457', 'the person swings a golf club.'), ('009613', 'someone takes several steps back, starts running and gains momentum to jump'), ('008463', 'the person is standing facing forward taking  a step to the left.'), ('014160', 'the person is touching head one hand after another.'), ('002530', 'a person walks forward starting with their left foot taking four steps.'), ('004945', 'a person is squatting and looking around and reaching with their right hand.'), ('001969', 'a person walks around to the right.'), ('005799', 'a person is walking and seem to be drunk'), ('000749', 'a person moves something from their left to right.'), ('006658', 'a person walks backwards in a straight line then stops.'), ('004124', 'the person walks forward slowly and then up some stairs.'), ('004965', 'a person takes 5 side steps left then 3 side steps back right.'), ('012805', 'a person leaping into a roll similar to a somersault, then taking off quickly after 

In [186]:
data_sub = pd.DataFrame(data_sub, columns=['id', 'text'])

In [161]:
data_sub.to_csv('submission.csv', index=False)

In [187]:
data_sub.to_csv('test3.csv', index=False)

In [1]:
print('test')

test


In [6]:
import numpy as np
import keras_nlp
from nltk.tokenize import word_tokenize
import os
from scipy.interpolate import interp1d
from nltk.translate.bleu_score import sentence_bleu
import re
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Dropout, MultiHeadAttention, LayerNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras import backend as K


In [9]:
# 1. Encodeur pour les mouvements (LSTM)
movement_input = Input(shape=(timesteps_movement, input_dim_movement))  # (None, 100, 66)
x = LSTM(256, return_sequences=True)(movement_input)
x = Dropout(0.3)(x)
x = LSTM(256, return_sequences=False)(x)
movement_latent = Dense(latent_dim, activation='relu')(x)


NameError: name 'timesteps_movement' is not defined

In [8]:
model = load_model('movement_to_text_model_bleu.h5')

