# Data Analysis and Preprocessing

In [4]:
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer

data = pd.read_excel('AI_data.xlsx')
training_text = data['MECHANIC'] + ' ' + data['OBJECTIVE OF THE GAME'] + ' ' + data['USP'] + ' ' + data['RULES OF THE GAME']
training_text_list = training_text.tolist()

with open('training_text.txt', 'w') as file:
    file.write('\n'.join(training_text_list))

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token  # Set eos_token as the pad_token
input_ids = tokenizer(training_text_list, return_tensors='pt', padding=True, truncation=True)['input_ids']

model = GPT2LMHeadModel.from_pretrained('gpt2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


# Fine-Tuning GPT-2 Model

In [6]:
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
from transformers import TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

data = pd.read_excel('AI_data.xlsx')
training_text = data['MECHANIC'] + ' ' + data['OBJECTIVE OF THE GAME'] + ' ' + data['USP'] + ' ' + data['RULES OF THE GAME']
training_text_list = training_text.tolist()

with open('training_text.txt', 'w') as file:
    file.write('\n'.join(training_text_list))

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
input_ids = tokenizer(training_text_list, return_tensors='pt', padding=True, truncation=True)['input_ids']
config = GPT2Config.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2', config=config)

train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path='training_text.txt',
    block_size=128,
)
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)
training_args = TrainingArguments(
    output_dir="./gpt2-fine-tuned",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
)
trainer.train()
model.save_pretrained("./gpt2-fine-tuned")



Step,Training Loss


# Image Generation (GAN)

In [7]:
import fitz
import io
from PIL import Image
import numpy as np

def load_pdf_images(pdf_path, target_shape=(64, 64, 3)):
    pdf_images = []

    pdf_document = fitz.open(pdf_path)

    for page_num in range(pdf_document.page_count):
        page = pdf_document[page_num]
        image_list = page.get_images(full=True)

        for img_index, img_info in enumerate(image_list):
            img_index = img_info[0]
            img_base = pdf_document.extract_image(img_index)
            img_bytes = img_base["image"]

            image = Image.open(io.BytesIO(img_bytes))
            preprocessed_image = image.resize(target_shape[:-1])  # Resize to target shape
            preprocessed_image = np.array(preprocessed_image) / 255.0  # Normalize
            pdf_images.append(preprocessed_image)

    return np.array(pdf_images)

In [9]:
import tensorflow as tf
from tensorflow.python.keras import layers
from PIL import Image
import numpy as np
import pandas as pd
import os
import io
import fitz

def load_pdf_images(pdf_path, target_shape=(64, 64, 3)):
    pdf_images = []

    pdf_document = fitz.open(pdf_path)

    for page_num in range(pdf_document.page_count):
        page = pdf_document[page_num]
        image_list = page.get_images(full=True)

        for img_index, img_info in enumerate(image_list):
            img_index = img_info[0]
            img_base = pdf_document.extract_image(img_index)
            img_bytes = img_base["image"]

            image = Image.open(io.BytesIO(img_bytes))
            preprocessed_image = image.resize(target_shape[:-1])  # Resize to target shape
            preprocessed_image = np.array(preprocessed_image) / 255.0  # Normalize
            pdf_images.append(preprocessed_image)

    return np.array(pdf_images)

def combine_data(excel_data, pdf_images):
    pdf_images_flat = pdf_images.reshape(pdf_images.shape[0], -1)
    combined_data = pd.concat([excel_data, pd.DataFrame(pdf_images_flat)], axis=1)
    return combined_data

latent_dim_gan = 100
img_shape_gan = (64, 64, 3)
channels_gan = 3
excel_path_gan = "/content/AI_data.xlsx"
pdf_path_gan = "/content/excel_task_game_images.pdf"
model_dir_gan = "/content/saved_model"

def load_excel_data(excel_path):
    excel_data = pd.read_excel(excel_path)
    return excel_data

excel_data_gan = load_excel_data(excel_path_gan)
pdf_images_gan = load_pdf_images(pdf_path_gan, target_shape=img_shape_gan)
combined_data_gan = combine_data(excel_data_gan, pdf_images_gan)
pdf_images_gan = pdf_images_gan[:430272]
pdf_images_gan = pdf_images_gan.reshape(-1, 64, 64, 3)

def preprocess_image(image):
    image = image.resize((64, 64))
    image = np.array(image) / 255.0
    return image

def build_generator(latent_dim, channels):
    model = tf.keras.Sequential()
    model.add(layers.Dense(7 * 7 * 128, input_dim=latent_dim))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Reshape((7, 7, 128)))
    model.add(layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Conv2DTranspose(channels, (4, 4), activation='sigmoid', padding='same'))
    return model

def build_discriminator(img_shape):
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(64, (3, 3), strides=(2, 2), padding='same', input_shape=img_shape))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

def build_gan(generator, discriminator):
    discriminator.trainable = False
    model = tf.keras.Sequential()
    model.add(generator)
    model.add(discriminator)
    model.compile(loss='binary_crossentropy', optimizer='adam')
    return model

def train_gan(generator, discriminator, gan, combined_data, epochs=10, batch_size=32):
    X = combined_data.iloc[:, 1:].to_numpy()
    y = combined_data.iloc


# Generate Ideas

In [10]:
generated_ideas = []
for _ in range(5):
    seed = training_text.sample().values[0]
    generated_text = model.generate(tokenizer(seed, return_tensors='pt')['input_ids'], max_length=200, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95)
    generated_idea = tokenizer.decode(generated_text[0], skip_special_tokens=True)
    generated_ideas.append(generated_idea)

generated_ideas_df = pd.DataFrame({'Generated Ideas': generated_ideas})
generated_ideas_df.to_csv('generated_ideas.csv', index=False)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

# PDF with Game Ideas and Images

In [13]:
print(generated_ideas_df.columns)

Index(['Generated Ideas'], dtype='object')


In [19]:
from fpdf import FPDF
import fitz
import io
from PIL import Image
import numpy as np
import pandas as pd

def extract_images_from_pdf(pdf_path):
    pdf_images = []
    pdf_document = fitz.open(pdf_path)
    for page_num in range(pdf_document.page_count):
        page = pdf_document[page_num]
        image_list = page.get_images(full=True)

        for img_index, img_info in enumerate(image_list):
            img_index = img_info[0]
            img_base = pdf_document.extract_image(img_index)
            img_bytes = img_base["image"]

            image = Image.open(io.BytesIO(img_bytes))
            pdf_images.append(np.array(image))

    return pdf_images

def create_pdf_with_images(ideas, images, output_path="game_ideas.pdf"):
    pdf = FPDF()
    for i, (idea, img_array) in enumerate(zip(ideas, images), start=1):
        pdf.add_page()
        pdf.set_font("Arial", size=12)
        pdf.cell(200, 10, txt=f"Idea {i}:", ln=True, align='L')
        pdf.multi_cell(0, 10, txt=idea)
        pdf.ln(10)
        pdf.image(Image.fromarray((img_array * 255).astype(np.uint8)), x=None, y=None, w=150)
    pdf.output(output_path)

csv_path = "/content/generated_ideas.csv"
generated_ideas_df = pd.read_csv(csv_path)
generated_ideas = generated_ideas_df["Generated Ideas"].tolist()

pdf_path = "/content/excel_task_game_images.pdf"
extracted_images = extract_images_from_pdf(pdf_path)
create_pdf_with_images(generated_ideas, extracted_images)


  pdf.set_font("Arial", size=12)
  pdf.cell(200, 10, txt=f"Idea {i}:", ln=True, align='L')
  pdf.cell(200, 10, txt=f"Idea {i}:", ln=True, align='L')
  pdf.multi_cell(0, 10, txt=idea)
