In [9]:
# %pip install google-cloud-aiplatform==1.25.0
# %pip install google-api-core==1.33.1

In [10]:
import vertexai
from vertexai.preview.language_models import TextGenerationModel

from google.cloud import storage
import pandas as pd

In [11]:
# ! /Users/scottsmacbook/google-cloud-sdk/bin/gcloud auth application-default login

In [12]:
from google.cloud import storage


def authenticate_implicit_with_adc(project_id="your-google-cloud-project-id"):
    """
    When interacting with Google Cloud Client libraries, the library can auto-detect the
    credentials to use.

    // TODO(Developer):
    //  1. Before running this sample,
    //  set up ADC as described in https://cloud.google.com/docs/authentication/external/set-up-adc
    //  2. Replace the project variable.
    //  3. Make sure that the user account or service account that you are using
    //  has the required permissions. For this sample, you must have "storage.buckets.list".
    Args:
        project_id: The project id of your Google Cloud project.
    """

    # This snippet demonstrates how to list buckets.
    # *NOTE*: Replace the client created below with the client required for your application.
    # Note that the credentials are not specified when constructing the client.
    # Hence, the client library will look for credentials using ADC.
    storage_client = storage.Client(project=project_id)
    buckets = storage_client.list_buckets()
    print("Buckets:")
    for bucket in buckets:
        print(bucket.name)
    print("Listed all storage buckets.")

In [13]:
authenticate_implicit_with_adc('msca310019-capstone-49b3')

Buckets:
user-scripts-msca310019-capstone-49b3
Listed all storage buckets.


In [14]:
def predict_large_language_model_sample(
    project_id: str,
    model_name: str,
    temperature: float,
    max_decode_steps: int,
    top_p: float,
    top_k: int,
    content: str,
    location: str = "us-central1",
    tuned_model_name: str = "",
    ) :
    """Predict using a Large Language Model."""
    vertexai.init(project=project_id, location=location)
    model = TextGenerationModel.from_pretrained(model_name)
    if tuned_model_name:
        model = model.get_tuned_model(tuned_model_name)
    response = model.predict(
        content,
        temperature=temperature,
        max_output_tokens=max_decode_steps,
        top_k=top_k,
        top_p=top_p,)
    return response.text

In [15]:
text = '''An important paradigm of natural language processing consists of large-scale pre-training on general domain data and adaptation to particular tasks or domains. As we pre-train larger models, full fine-tuning, which retrains all model parameters, becomes less feasible. Using GPT-3 175B as an example -- deploying independent instances of fine-tuned models, each with 175B parameters, is prohibitively expensive. We propose Low-Rank Adaptation, or LoRA, which freezes the pre-trained model weights and injects trainable rank decomposition matrices into each layer of the Transformer architecture, greatly reducing the number of trainable parameters for downstream tasks. Compared to GPT-3 175B fine-tuned with Adam, LoRA can reduce the number of trainable parameters by 10,000 times and the GPU memory requirement by 3 times. LoRA performs on-par or better than fine-tuning in model quality on RoBERTa, DeBERTa, GPT-2, and GPT-3, despite having fewer trainable parameters, a higher training throughput, and, unlike adapters, no additional inference latency. We also provide an empirical investigation into rank-deficiency in language model adaptation, which sheds light on the efficacy of LoRA. We release a package that facilitates the integration of LoRA with PyTorch models and provide our implementations and model checkpoints for RoBERTa, DeBERTa, and GPT-2'''

In [16]:
print(text)

An important paradigm of natural language processing consists of large-scale pre-training on general domain data and adaptation to particular tasks or domains. As we pre-train larger models, full fine-tuning, which retrains all model parameters, becomes less feasible. Using GPT-3 175B as an example -- deploying independent instances of fine-tuned models, each with 175B parameters, is prohibitively expensive. We propose Low-Rank Adaptation, or LoRA, which freezes the pre-trained model weights and injects trainable rank decomposition matrices into each layer of the Transformer architecture, greatly reducing the number of trainable parameters for downstream tasks. Compared to GPT-3 175B fine-tuned with Adam, LoRA can reduce the number of trainable parameters by 10,000 times and the GPU memory requirement by 3 times. LoRA performs on-par or better than fine-tuning in model quality on RoBERTa, DeBERTa, GPT-2, and GPT-3, despite having fewer trainable parameters, a higher training throughput

In [17]:
prompt = '''Rewrite the following article so it can be understood by 5 year old:'''

In [18]:
predict_large_language_model_sample(project_id = "msca310019-capstone-49b3", 
                                    model_name = "text-bison@001", 
                                    temperature = 0.2, 
                                    max_decode_steps = 768, 
                                    top_p = 0.8, 
                                    top_k = 40, 
                                    location = "us-central1",
                                    content = prompt + ' ' + text)

'. Computers are getting better at understanding language. They can do this by learning from a lot of text. But when they learn from a lot of text, they get really big. This can make it hard to use them for some things.\n\nWe have a new way to make computers understand language that is smaller and faster. It works by using a special kind of math. We call it Low-Rank Adaptation, or LoRA.\n\nLoRA works by taking the big computer model and making it smaller. It does this by only using the parts of the model that are important for the task the computer is trying to do. This makes the computer faster and uses less memory.\n\nLoRA is as good as the old way of making computers understand language, but it is smaller and faster. We hope that LoRA will help computers do more things with language.'

In [19]:
prompt = '''Rewrite the following article so it can be understood by 10 year old:'''

In [20]:
predict_large_language_model_sample(project_id = "msca310019-capstone-49b3", 
                                    model_name = "text-bison@001", 
                                    temperature = 0.2, 
                                    max_decode_steps = 256, 
                                    top_p = 0.95, 
                                    top_k = 40, 
                                    location = "us-central1",
                                    content = prompt + ' ' + text)

''

In [21]:
prompt = '''Rewrite the following article so it can be understood by 15 year old:'''

In [22]:
predict_large_language_model_sample(project_id = "msca310019-capstone-49b3", 
                                    model_name = "text-bison@001", 
                                    temperature = 0.2, 
                                    max_decode_steps = 256, 
                                    top_p = 0.95, 
                                    top_k = 40, 
                                    location = "us-central1",
                                    content = prompt + ' ' + text)

'. One way to make computers understand human language is to teach them a lot about the world. We do this by feeding them a lot of text and then asking them questions about it. This process is called "pre-training". Once a computer has been pre-trained, we can then "fine-tune" it to do a specific task, like writing different kinds of creative text.\n\nThe problem is that pre-training computers to do this is very expensive. It requires a lot of computing power and time. So we need to find ways to make it cheaper.\n\nOne way to do this is to use a technique called "low-rank adaptation". This means that we only train a small part of the computer\'s language model. This makes it much cheaper to fine-tune, but it doesn\'t seem to affect the quality of the results.\n\nWe\'ve released a package that makes it easy to use low-rank adaptation with PyTorch models. We\'ve also trained some models using this technique and we\'re releasing them so that other people can use them.'

In [23]:
prompt_email = '''Create a brief response to the following email in a professional manner :'''




In [24]:
text_email = 'Hey, This week is probably not a good week, but lets definately shoot for early next week.'

In [25]:
predict_large_language_model_sample(project_id = "msca310019-capstone-49b3", 
                                    model_name = "text-bison@001", 
                                    temperature = 0.2, 
                                    max_decode_steps = 256, 
                                    top_p = 0.95, 
                                    top_k = 40, 
                                    location = "us-central1",
                                    content = prompt_email + ' ' + text_email)

''

In [4]:
prompt_email = '''Create a response to the following email in a professional manner : '''

In [5]:
text_email = '''Hey Mike! Hope you had a great thanksgiving weekend. I used it to check out NYC! Anyway, 
                what's your take on the future of the firm? And how are the employees reacting so far? 
                It's difficult to recall a weekday when the WSJ does not carry an Enron related article. 
                And now it seems the merger is doubtful. '''

In [6]:
predict_large_language_model_sample(project_id = "msca310019-capstone-49b3", 
                                    model_name = "text-bison@001", 
                                    temperature = 0.2, 
                                    max_decode_steps = 256, 
                                    top_p = 0.95, 
                                    top_k = 40, 
                                    location = "us-central1",
                                    content = prompt_email + ' ' + text_email)

NameError: name 'predict_large_language_model_sample' is not defined

In [7]:
import datetime
import pytz

datetime.datetime.now(pytz.timezone('US/Central')).strftime("%a, %d %B %Y %H:%M:%S")

'Thu, 18 May 2023 12:03:32'

In [8]:
client = storage.Client()
bucket = client.get_bucket('user-scripts-msca310019-capstone-49b3')

NameError: name 'storage' is not defined

In [None]:
blob = bucket.blob('data/data_message_reply_pairs_cleaned.csv')
content = blob.download_as_string()

In [None]:
import io
message_replies = pd.read_csv(io.BytesIO(content))

In [None]:
#message_replies = pd.read_csv("/Users/scottsmacbook/capstone/00_Data/message_reply_pairs_cleaned.csv")

In [None]:
message_replies.head()

In [None]:
message_replies['message_list'] = message_replies['message'].apply(lambda x: str(x).split())

In [None]:
message_replies_trimmed = message_replies[message_replies.message_list.apply(lambda x: len(x)>3)]

In [None]:
len(message_replies_trimmed)

In [None]:
len(message_replies)

In [None]:
messages = list(message_replies_trimmed.message)
generated_response = []

for message in messages:
    generated = predict_large_language_model_sample(project_id = "msca310019-capstone-49b3", 
                                    model_name = "text-bison@001", 
                                    temperature = 0.2, 
                                    max_decode_steps = 256, 
                                    top_p = 0.95, 
                                    top_k = 40, 
                                    location = "us-central1",
                                    content = prompt_email + ' ' + message)
    print(generated)
    generated_response.append(generated)

In [None]:
len(generated_response)

In [None]:
messages[2]