In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

In [2]:
path = "../programs/games/queries.txt"
# Read as a JSON
with open(path, 'r') as file:
    json_data = json.load(file)

json_data[0]

{'current_time': '2025-03-13 01:31:48',
 'soft_part': [{'type': 'TERM', 'term': 'Un homme est un chien', 'weight': 1},
  {'type': 'PRECOMPUTED', 'weight': 1, 'recordID': 6381}],
 'hard_part': []}

In [3]:
def does_contain_only_like(query):
    if len(query["hard_part"]) > 0:
        return False
    soft_part = query["soft_part"]
    judge = False
    otherTypeFound = False
    for queryPart in soft_part:
        queryType = queryPart.get("type", None)
        weight = queryPart.get("weight", None)
        recordID = queryPart.get("recordID", None)
        if queryType == "PRECOMPUTED" and recordID is not None:
            if weight > 0.0:
                judge = True
            else:
                # We do not take into account the queries with dislikes for now
                return False
        else:
            otherTypeFound = True
        
    return judge and otherTypeFound

queries_with_like = [query for query in json_data if does_contain_only_like(query)]
len(queries_with_like), queries_with_like[0]

(26,
 {'current_time': '2025-03-13 01:31:48',
  'soft_part': [{'type': 'TERM', 'term': 'Un homme est un chien', 'weight': 1},
   {'type': 'PRECOMPUTED', 'weight': 1, 'recordID': 6381}],
  'hard_part': []})

In [4]:
def format_query(query):
    # We assume that the query contains a like
    weights = []
    terms = []
    liked_recordIDs = []

    soft_part = query["soft_part"]
    for queryPart in soft_part:
        queryType = queryPart.get("type", None)
        weight = queryPart.get("weight", None)
        recordID = queryPart.get("recordID", None)
        if queryType == "PRECOMPUTED" and weight > 0.0 and recordID is not None:
            # This is a liked image
            liked_recordIDs.append(recordID)
        else:
            if queryType=="TERM":
                weights.append(weight)
                terms.append(queryPart["term"])
            elif queryType=="KEYWORD":
                weights.append(weight)
                terms.append(queryPart["keyword"])
            elif queryType=="COLOR":
                weights.append(weight)
                terms.append(queryPart["color"])
            elif queryType=="LUMINOSITY":
                weights.append(weight)
                terms.append(queryPart["luminosity"])

    return liked_recordIDs, weights, terms

liked_recordIDs, weights, terms = format_query(queries_with_like[0])
liked_recordIDs, weights, terms

([6381], [1], ['Un homme est un chien'])

In [5]:
extra_training_data = pd.DataFrame(columns=["recordID", "weights_list", "terms_list"])
for query in queries_with_like:
    liked_recordIDs, weights, terms = format_query(query)
    fWeights = "|".join([str(w) for w in weights])
    fTerms = "|".join(terms)
    for recordID in liked_recordIDs:
        extra_training_data.loc[len(extra_training_data)] = [recordID, fWeights, fTerms]
# Remove duplicates if any
extra_training_data.drop_duplicates(inplace=True)
extra_training_data

Unnamed: 0,recordID,weights_list,terms_list
0,6381,1,Un homme est un chien
1,2106,1,Une femme avec un chapeau rouge
2,854,1,Une maison avec une cheminée
3,5048,1|1.25,Une femme|Un jouet
4,10357,1,Deux hommes qui se tiennent debout et qui rega...
5,11348,1,Un homme avec des yeux bleus
6,1665,1.75|1|1,Fête|Nuages|Crépuscule
7,206,1|1.5,Fenêtre|Couronne
8,8266,1|1,Forêt|Aube
9,552,1,Une montagne avec un sapin et une rivière


In [6]:
patiences = [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 2, 0, 
1, 2, 3, 0, 1, 0, 0, 1, 2, 0, 1, 0, 1, 2, 0, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 
0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 2, 3, 0, 0, 1, 2, 0, 0, 1, 0, 1, 2, 0, 1, 2]