In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Install necessary libraries
!pip install --upgrade transformers
!pip install torch
!pip install datasets
!pip install webdataset
!pip install pytorch_lightning
!pip install git+https://github.com/openai/CLIP.git
# download the linear mse model path
!wget https://github.com/microsoft/LMOps/raw/main/promptist/aesthetic/sac%2Blogos%2Bava1-l14-linearMSE.pth
!pip install openai
!pip install hpsv2

In [None]:
import sys
sys.path.append('/content/drive/Shared drives/capstone_rlt2i/codes/reward_predictor/aesthetic_scores/')
import simple_inference_custom

In [None]:
! pip install diffusers
! pip install seaborn

In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
import random
# simple_inference_custom is the customized script we have to calcualte aesthetic score
from simple_inference_custom import predict_aesthetic_score
import os
import hpsv2
from sklearn.preprocessing import MinMaxScaler
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
import numpy as np
import openai
import json
import os
from diffusers import StableDiffusionPipeline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
t2i_pipe = StableDiffusionPipeline.from_pretrained("prompthero/openjourney", torch_dtype=torch.float32)
t2i_pipe = t2i_pipe.to(device)

In [None]:
df = pd.read_csv('/content/drive/Shared drives/capstone_rlt2i/Momo/finetuning-dataset.csv')

In [None]:
df.head()

Unnamed: 0,Base_prompt,Prompt
0,realistic car 3 d,realistic car 3 d render sci - fi car and sci ...
1,a comic potrait of a female necromamcer,a comic potrait of a female necromamcer with b...
2,steampunk market,"steampunk market interior, colorful, 3 d scene..."
3,“ A portrait of a cyborg,"“A portrait of a cyborg in a golden suit, D&D ..."
4,A full portrait of a beautiful post,A full portrait of a beautiful post apocalypti...


In [None]:
# initialization language model
model_name = "meta-llama/Llama-2-7b-chat-hf"
access_token = "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"

tokenizer = AutoTokenizer.from_pretrained(model_name, token=access_token)
model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token)

# Set the model to evaluation mode
model.eval()

In [None]:
few_shot_prompts = (tokenizer.bos_token + "Base prompt: a green hair guy\nEnriched prompt for text-to-image generation: a highly detailed portrait of a man with dark green hair and green glowing eyes, high detail clothing, concept art, anime, artstation, professional."+ tokenizer.eos_token
    + "\n" + tokenizer.bos_token + "Base prompt: animal crossing werewolf\nEnriched prompt for text-to-image generation: a cute chibi werewolf animal crossing villager. animal crossing character. 3 d render, 3 d model, simplified, animal crossing new horizons, hq, arstation."+ tokenizer.eos_token
    + "\n" + tokenizer.bos_token + "Base prompt: an android woman\nEnriched prompt for text-to-image generation: portrait of a beautiful android woman, futuristic, chrome and colorful, photo realistic, ray tracing, 3 d shading, octane render."+ tokenizer.eos_token
)

few_shot_prompt_encoded = tokenizer.encode(few_shot_prompts, return_tensors='pt')[0]

In [None]:
# use openjourney text to image model
def text_to_image_generation(prompts):
    images = t2i_pipe(prompts).images
    return images

def prompt_enrichment(prompt, temperature=0.5, desired_length=300):
    curr_prompt = "Base prompt: " + prompt + "\nEnriched prompt for text-to-image generation: "

    # Tokenize the prompt
    curr_prompt_encoded = tokenizer.encode("\n" + tokenizer.bos_token + curr_prompt, return_tensors='pt')[0]
    input_ids = torch.cat((few_shot_prompt_encoded, curr_prompt_encoded), dim=0)

    # Generate text with temperature and desired length
    output = model.generate(input_ids.unsqueeze(0), max_length=desired_length, num_return_sequences=1, bos_token_id=tokenizer.bos_token_id,
                            eos_token_id=tokenizer.eos_token_id, temperature=temperature, do_sample=True)

    # Decode the generated text
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Remove possible unwanted text
    start_idx = generated_text.find(curr_prompt)
    if start_idx != -1:
        generated_text = generated_text[start_idx + len(curr_prompt):]

    end_idx = generated_text.find("\n")
    if end_idx != -1:
        generated_text = generated_text[:end_idx]

    end_idx = generated_text.find("Base prompt")
    if end_idx != -1:
        generated_text = generated_text[:end_idx]

    generated_text = generated_text.strip()

    return generated_text

# updated version: 0.5 aesthetic, 0.5 hps score
def compute_combined_score(text, image, w_aesthetic=0.2, w_hps=0.8):
    try:
        aesthetic_score = predict_aesthetic_score(image)
        HPS_score = hpsv2.score(image, text)
    except Exception as e:
        print(f"Error processing image: {str(e)}")
        return None

    aesthetic_score_val = aesthetic_score.cpu().item()

    # Normalize aesthetic score to be in the range [0, 1]
    aesthetic_score_normalized = aesthetic_score_val / 10.0

    # Apply power transformation to HPS score to amplify the difference between HPS scores
    HPS_score_transformed = HPS_score[0]

    # Since HPS score is already in the range [0, 1], we don't need to normalize it again
    # Calculate the combined score as a weighted sum
    combined_score = w_aesthetic * aesthetic_score_normalized + w_hps * HPS_score_transformed
    combined_score_tensor = torch.tensor(combined_score, requires_grad=True).to(device)

    return combined_score_tensor

In [None]:
def visualize_results(results_df):
    # Set the aesthetic style of the plots
    sns.set_style("whitegrid")

    # x = temperature, y as temperature distribution
    plt.figure(figsize=(10, 6))
    ax = sns.boxplot(x='Temperature', y='Combined Score', data=results_df, palette='viridis')
    plt.title('Effect of Temperature on Combined Score')
    plt.xlabel('Temperature')
    plt.ylabel('Combined Score')
    plt.show()

def generate_results(df_data_samples, temperature_range, model_base):
    results = []

    for _, row in df_data_samples.iterrows():
        prompt = row['base_prompt']

        for temp in temperature_range:
            enriched_prompt = prompt_enrichment(prompt, temperature=temp)
            generated_image = text_to_image_generation([enriched_prompt])[0]
            combined_score = compute_combined_score(enriched_prompt, generated_image)
            if combined_score is not None:
                results.append({'Temperature': temp, 'Combined Score': combined_score.cpu().item()})
                print(f"Temperature: {temp}, Combined Score: {combined_score.cpu().item()}")

    results_df = pd.DataFrame(results)
    return results_df

In [None]:
temperature_range = [0.2, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

In [None]:
df_samples = df.sample(n=1000)

In [None]:
# Generate results
results_df = generate_results(df_samples, temperature_range, model_base)

# Visualize the temperature effect
visualize_results(results_df)