In [1]:
# import sys
# !echo "Installation in progress..."
# !conda install -y --quiet  --prefix {sys.prefix} -c conda-forge \
#     #accelerate==0.23.0 \
#     #validators==0.22.0 \
#     #diffusers==0.18.2 \
#     #transformers==4.32.1 \
#     pillow \
#     PyPDF2 \
#     numpy \
#     openai \
#     ipython > /dev/null && echo "Installation successful" || echo "Installation failed"

!{sys.executable} -m !pip install invisible-watermark --user > /dev/null 2>&1
!{sys.executable} -m !pip install transformers huggingface-hub --user > /dev/null 2>&1
!{sys.executable} -m !pip install PyPDF2 --user > /dev/null 2>&1
!{sys.executable} -m !pip install numpy --user > /dev/null 2>&1
#!{sys.executable} -m !pip install openai==1.12.0 --user > /dev/null 2>&1
!{sys.executable} -m !pip install typing-extensions==3.10.0.2 --user > /dev/null 2>&1
!echo "Installation complete..."

Installation complete...


In [2]:
!pip install --force-reinstall typing-extensions==4.7 openai==1.12

Defaulting to user installation because normal site-packages is not writeable
Collecting typing-extensions==4.7
  Using cached typing_extensions-4.7.0-py3-none-any.whl (33 kB)
Collecting openai==1.12
  Using cached openai-1.12.0-py3-none-any.whl (226 kB)
Collecting anyio<5,>=3.5.0 (from openai==1.12)
  Using cached anyio-4.2.0-py3-none-any.whl (85 kB)
Collecting distro<2,>=1.7.0 (from openai==1.12)
  Using cached distro-1.9.0-py3-none-any.whl (20 kB)
Collecting httpx<1,>=0.23.0 (from openai==1.12)
  Using cached httpx-0.26.0-py3-none-any.whl (75 kB)
Collecting pydantic<3,>=1.9.0 (from openai==1.12)
  Using cached pydantic-2.6.1-py3-none-any.whl (394 kB)
Collecting sniffio (from openai==1.12)
  Using cached sniffio-1.3.0-py3-none-any.whl (10 kB)
Collecting tqdm>4 (from openai==1.12)
  Using cached tqdm-4.66.2-py3-none-any.whl (78 kB)
Collecting idna>=2.8 (from anyio<5,>=3.5.0->openai==1.12)
  Using cached idna-3.6-py3-none-any.whl (61 kB)
Collecting exceptiongroup>=1.0.2 (from anyio<5,>

In [3]:
import openai

In [4]:
import os
import random
import time
import numpy as np

import sys

from io import BytesIO
import os
import random
import time
import warnings
from pathlib import Path
from typing import List, Dict, Tuple

# Suppress warnings for a cleaner output.
warnings.filterwarnings("ignore")

import requests
import torch
import torch.nn as nn
import intel_extension_for_pytorch as ipex  # Used for optimizing PyTorch models
from PIL import Image

from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler

from PyPDF2 import PdfReader
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
#import openai

In [5]:
# THe class

class Text2ImgModel:
    """
    Text2ImgModel is a class for generating images based on text prompts using a pretrained model.

    Attributes:
    - device: The device to run the model on. Default to "xpu" - Intel dGPUs.
    - pipeline: The loaded model pipeline.
    - data_type: The data type to use in the model.
    """

    def __init__(
            self,
            model_id_or_path: str,
            device: str = "cpu",
            torch_dtype: torch.dtype = torch.bfloat16,
            optimize: bool = True,
            enable_scheduler: bool = False,
            warmup: bool = False,
    ) -> None:
        """
        The initializer for Text2ImgModel class.

        Parameters:
        - model_id_or_path: The identifier or path of the pretrained model.
        - device: The device to run the model on. Default is "xpu".
        - torch_dtype: The data type to use in the model. Default is torch.bfloat16.
        - optimize: Whether to optimize the model after loading. Default is True.
        """

        self.device = device
        self.pipeline = self._load_pipeline(
            model_id_or_path, torch_dtype, enable_scheduler
        )
        self.data_type = torch_dtype
        if optimize:
            start_time = time.time()
            # print("Optimizing the model...")
            self.optimize_pipeline()
            # print(
            #    "Optimization completed in {:.2f} seconds.".format(
            #        time.time() - start_time
            #    )
            # )
        if warmup:
            self.warmup_model()

    def _load_pipeline(
            self,
            model_id_or_path: str,
            torch_dtype: torch.dtype,
            enable_scheduler: bool,

    ) -> DiffusionPipeline:
        """
        Loads the pretrained model and prepares it for inference.

        Parameters:
        - model_id_or_path: The identifier or path of the pretrained model.
        - torch_dtype: The data type to use in the model.

        Returns:
        - pipeline: The loaded model pipeline.
        """

        print("Loading the model...")
        model_path = Path(f"Big_Data/GenAI/{model_id_or_path}")

        if model_path.exists():
            # print(f"Loading the model from {model_path}...")
            load_path = model_path
        else:
            print("Using the default path for models...")
            load_path = model_id_or_path

        pipeline = DiffusionPipeline.from_pretrained(
            load_path,
            torch_dtype=torch_dtype,
            use_safetensors=True,
            variant="fp16",
        )
        if enable_scheduler:
            pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
                pipeline.scheduler.config
            )
        if not model_path.exists():
            try:
                print(f"Attempting to save the model to {model_path}...")
                pipeline.save_pretrained(f"{model_path}")
                print("Model saved.")
            except Exception as e:
                print(f"An error occurred while saving the model: {e}. Proceeding without saving.")
        pipeline = pipeline.to(self.device)
        # print("Model loaded.")
        return pipeline

    def _optimize_pipeline(self, pipeline: DiffusionPipeline) -> DiffusionPipeline:
        """
        Optimizes the model for inference using ipex.

        Parameters:
        - pipeline: The model pipeline to be optimized.

        Returns:
        - pipeline: The optimized model pipeline.
        """

        for attr in dir(pipeline):
            if isinstance(getattr(pipeline, attr), nn.Module):
                setattr(
                    pipeline,
                    attr,
                    ipex.optimize(
                        getattr(pipeline, attr).eval(),
                        dtype=pipeline.text_encoder.dtype,
                        inplace=True,
                    ),
                )
        return pipeline

    def warmup_model(self):
        """
        Warms up the model by generating a sample image.
        """
        print("Setting up model...")
        start_time = time.time()
        self.generate_images(
            prompt="A beautiful sunset over the mountains",
            num_images=1,
            save_path=".tmp",
        )
        print(
            "Model is set up and ready! Warm-up completed in {:.2f} seconds.".format(
                time.time() - start_time
            )
        )

    def optimize_pipeline(self) -> None:
        """
        Optimizes the current model pipeline.
        """

        self.pipeline = self._optimize_pipeline(self.pipeline)

    def generate_images(
            self,
            prompt: str,
            num_inference_steps: int = 50,
            num_images: int = 5,
            save_path: str = "output",
    ) -> List[Image.Image]:
        """
        Generates images based on the given prompt and saves them to disk.

        Parameters:
        - prompt: The text prompt to generate images from.
        - num_inference_steps: Number of noise removal steps.
        - num_images: The number of images to generate. Default is 5.
        - save_path: The directory to save the generated images in. Default is "output".

        Returns:
        - images: A list of the generated images.
        """

        images = []
        for i in range(num_images):
            with torch.cpu.amp.autocast(
                    enabled=True if self.data_type != torch.float32 else False,
                    dtype=self.data_type,
            ):
                image = self.pipeline(
                    prompt=prompt,
                    num_inference_steps=num_inference_steps,
                    # negative_prompt=negative_prompt,
                ).images[0]
                if not os.path.exists(save_path):
                    try:
                        os.makedirs(save_path)
                    except OSError as e:
                        print("Failed to create directory", save_path, "due to", str(e))
                        raise
            output_image_path = os.path.join(
                save_path,
                f"{'_'.join(prompt.split()[:3])}_{i}_{sum(ord(c) for c in prompt) % 10000}.png",
            )
            image.save(output_image_path)
            images.append(image)
        return images

In [6]:
### prompt_to_image

model_cache = {}

def prompt_to_image(prompt_arr):
    output_dir = "output"
    model_ids = [
        "stabilityai/stable-diffusion-2-1",
        "CompVis/stable-diffusion-v1-4",
    ]

    enhance_checkbox_value = False
    num_images = 1

    enhancements = [
        "historical"
    ]

    if not prompt_arr:
        prompt_arr = []

    for model_id in model_ids:
        model_key = (model_id, "cpu") # Changed from xpu
        if model_key not in model_cache:
            model_cache[model_key] = Text2ImgModel(model_id, device="cpu") # Changed from xpu

        model = model_cache[model_key]

        try:
            for prompt_text in prompt_arr:
                if enhance_checkbox_value:
                    prompt_text = prompt_text + " " + " ".join(random.sample(enhancements, 5))
                    print(f"Using enhanced prompt: {prompt_text}")

                start_time = time.time()
                model.generate_images(
                    prompt_text,
                    num_images=num_images,
                    save_path="./output",
                )
                print(f"Complete generating {num_images} images in './output' in {time.time() - start_time:.2f} seconds.")
        except Exception as e:
            print(f"An error occurred: {e}")

In [None]:
def split_string(input):
    '''
    Function: Parsing the json object we receive from firebase.
    Structure of json file: an array of key-value pairs, with keys being page numbers
    :return: array of strings
    '''

    # Test
    string_txt = "Barack Obama giving his inauguration speech.* The fall of the Berlin Wall.* A depiction of the Industrial Revolution, focusing on its impact on rural communities.* The Battle of Gettysburg from the viewpoint of a Confederate soldier.*"

    # parse json
    special_char = '.'

    str_arr = input.split(special_char)
    return str_arr[:-1]


def pdf_gen():
    # creating a pdf reader object
    reader = PdfReader('ap-psych.pdf')

    startingPageNo = 87  # TODO: input from the frontend
    endingPageNo = 89  # TODO: input from the frontend

    textOutput = ""

    for currPageNo in range(startingPageNo + 1 - 2, endingPageNo + 1 - 2):
        # getting a specific page from the pdf file
        page = reader.pages[currPageNo]

        # extracting text from page
        textOutput += page.extract_text()

    tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-large-xsum-samsum", use_fast=False)
    model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-large-xsum-samsum")
    summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)

    # Make the summarized content catchy

    openai.api_key = "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"

    summarizedTextOutput = summarizer(textOutput)

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "don't use emojis but make the content sound intriguing with an attention catching opening in 33 words. add a relevant factual information that might help users understand the concept better."
            },
            {
                "role": "user",
                "content": summarizedTextOutput[0]["summary_text"]
            }
        ],
        temperature=0.7,
        max_tokens=64,
        top_p=1
    )

    print("\nOriginal Textbook Content:", textOutput)
    print("========================================================")
    print("\nSummarized Content:", summarizedTextOutput[0]["summary_text"])
    print("========================================================")
    print("\nChatGPT's catchy response:", response.choices[0].message.content)

    str_arr = split_string(response.choices[0].message.content)

    print(str_arr)

    # Call the prompt_to_image()
    prompt_to_image(str_arr)


if __name__ == '__main__':
    pdf_gen()

2024-02-10 21:45:25,324 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Original Textbook Content: behaviorism
the view that psychology (1) should be an objective science that (2) studies
behavior without reference to mental processes. Most psychologists today agree
with (1) but not with (2).
Freudian (Psychoanalytic) Psychology
The other major force was 
Sigmund Freud
’s psychoanalytic psychology,
which emphasized the ways our unconscious mind and childhood
experiences affect our behavior. (In later modules, we’ll look more closely
at Freud’s teachings, including his theory of personality and his views on
unconscious sexual conflicts and the mind’s defenses against its own
wishes and impulses.)
John B. Watson (1878–1958) and Rosalie Rayner (1898–1935)
Working with Rayner, Watson championed psychology as the scientific study of
behavior. In a controversial study on a baby who became famous as “Little
Albert,” he and Rayner showed that fear could be learned. (More about this in
Module 26
.)
87B. F. Skinner (1904–1990)
 
This leading behaviorist rejected in

  0%|          | 0/50 [00:00<?, ?it/s]