In [1]:
class ClothingEnvironment:
    def __init__(self, pants, shirts):
        self.pants = pants
        self.shirts = shirts
        self.num_pants = len(pants)
        self.num_shirts = len(shirts)
        self.reset()

    def reset(self):
        self.current_pant_idx = np.random.randint(self.num_pants)
        self.current_pant_desc = self.pants[self.current_pant_idx]

        matching_shirts = [s for s in self.shirts if s in self.current_pant_desc]
        if matching_shirts:
            self.current_shirt_desc = np.random.choice(matching_shirts)
        else:
            self.current_shirt_desc = np.random.choice(self.shirts)

        self.reward = 0


    def step(self, action):
        """
        Action: index of the shirt description to choose as the output
        Returns:
        - observation: the next pant description, or a dummy observation if the episode is over
        - reward: the reward for the chosen action, or None if the episode is over
        - done: whether the episode is over
        """
        chosen_shirt_desc = self.shirts[action]
        if chosen_shirt_desc in self.current_pant_desc:
            self.reward = 1
        else:
            self.reward = -1

        self.current_pant_idx = np.random.randint(self.num_pants)
        self.current_pant_desc = self.pants[self.current_pant_idx]
        observation = self.current_pant_desc
        done = False
        if np.random.rand() < 0.2:  # Set the probability of terminating the episode to 0.2
            done = True
            observation = np.zeros_like(self.current_pant_desc)
            reward = None
        return observation, self.reward, done

    def render(self, chosen_shirt_desc):
        print(f"Current Pant Description: {self.current_pant_desc}")
        print(f"Chosen Shirt Description: {chosen_shirt_desc}")
        print(f"Reward: {self.reward}")




In [2]:
import numpy as np

class QLearningAgent:
    def __init__(self, env, learning_rate=0.003, discount_factor=0.89, exploration_rate=2.0, exploration_decay_rate=0.99):
        self.env = env
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay_rate = exploration_decay_rate
        self.q_table = np.zeros((env.num_pants, env.num_shirts))

    def choose_action(self, state):
        if np.random.uniform() < self.exploration_rate:
            # Take a random action
            action = np.random.randint(self.env.num_shirts)
        else:
            # Choose the action with the highest Q-value
            action = np.argmax(self.q_table[state, :])
        return action

    def update_q_table(self, state, action, reward, next_state):
        # Update the Q-value for the current state-action pair
        current_q = self.q_table[state, action]
        next_q = np.max(self.q_table[next_state, :])
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * next_q - current_q)
        self.q_table[state, action] = new_q

    def run_episode(self):
        state = self.env.current_pant_idx
        done = False
        while not done:
            action = self.choose_action(state)
            observation, reward, done = self.env.step(action)
            next_state = self.env.current_pant_idx
            self.update_q_table(state, action, reward, next_state)
            state = next_state
        self.exploration_rate *= self.exploration_decay_rate



In [4]:
env = ClothingEnvironment(pants=['blue jeans', 'black pants', 'khaki shorts'], shirts=['red shirt', 'blue shirt', 'white shirt'])
agent = QLearningAgent(env)
for i in range(1000):
    agent.run_episode()


In [3]:
import pandas as pd
df = pd.read_csv('dataset.csv')
p = df.pant_description
s = df.shirt_description
p = list(p[:])
s = list(s[:])

In [None]:
# Initialize the environment
pants = p
shirts = s
env = ClothingEnvironment(pants, shirts)
import random
# Initialize the Q-learning agent
agent = QLearningAgent(env)

# Train the agent by running multiple episodes
for i in range(1000):
    agent.run_episode()
cc = True
while cc:
    op = []
    op.append(p[random.randint(0,500)]) 
    op.append(p[random.randint(0,500)])
    op.append(p[random.randint(0,500)])
    op.append(p[random.randint(0,500)])

    print("1.",op[0])
    print("2.",op[1])
    print("3.",op[2])
    print("4.",op[3])
    c = int(input('Choose from the pants above (1-4):'))
    # Make a prediction for a given pant description
    cp = op[c-1]
    state = pants.index(cp)
    action = agent.choose_action(state)
    predicted_shirt = shirts[action]
    print('Pant:',cp)
    print('Chosen shirt:',predicted_shirt)

    feedback = input("Did you like the recommendation? (y/n): ")
    if feedback == "y":
        reward = 1
    else:
        reward = -1

    # Update the Q-table with the user's feedback
    next_state = env.current_pant_idx
    agent.update_q_table(state, action, reward, next_state)

    # Update the exploration rate
    agent.exploration_rate *= agent.exploration_decay_rate


1. White denim shorts
2. Olive green chino pants
3. Tapered navy blue cargo pants
4. Light blue chino shorts
Choose from the pants above (1-4):1
Pant: White denim shorts
Chosen shirt: Plain white t-shirt
Did you like the recommendation? (y/n): n
1. Skinny-fit dark wash jeans
2. Slim-fit beige chinos
3. Slim-fit khaki chinos
4. Classic navy blue dress pants
Choose from the pants above (1-4):2
Pant: Slim-fit beige chinos
Chosen shirt: Light blue button-up shirt
Did you like the recommendation? (y/n): y
1. Classic gray dress pants
2. Loose-fit black cargo pants
3. Relaxed-fit light wash denim shorts
4. Loose-fit beige cargo pants
