In [None]:
from google.colab import userdata
import os

try:
    hf_token = userdata.get('HF_TOKEN')
    print(f"HF_TOKEN: {hf_token}")
except Exception as e:
    print(f"Error fetching user data: {e}")
    print("Checking if running in Colab...")
    if 'COLAB_GPU' in os.environ:
        print("Running in Colab. Check if the notebook is running interactively.")
    else:
        print("Not running in Colab. User data functions are not available.")

# If not running in Colab or userdata fetch failed:
#   - Provide an alternative way to retrieve the token
#   - For instance, load from a local file or environment variable
# Example:
# hf_token = os.environ.get('HF_TOKEN')  # Get token from environment variable

This cell specifies the prompt for generating code that retrieves and displays real-time usage statistics for all system resources (CPU, memory, disk, and GPU) in the Colab environment.

In [None]:
# prompt: WRITE CODE TO get the usage of all the system resources that I am using

from google.colab import userdata
import os
import psutil

try:
    hf_token = userdata.get('HF_TOKEN')
    print(f"HF_TOKEN: {hf_token}")
except Exception as e:
    print(f"Error fetching user data: {e}")
    print("Checking if running in Colab...")
    if 'COLAB_GPU' in os.environ:
        print("Running in Colab. Check if the notebook is running interactively.")
    else:
        print("Not running in Colab. User data functions are not available.")

# Get system resource usage
def get_system_resource_usage():
    cpu_percent = psutil.cpu_percent(interval=1)  # CPU usage percentage
    memory = psutil.virtual_memory()
    memory_percent = memory.percent  # Memory usage percentage
    disk = psutil.disk_usage('/')
    disk_percent = disk.percent  # Disk usage percentage

    print(f"CPU Usage: {cpu_percent}%")
    print(f"Memory Usage: {memory_percent}%")
    print(f"Disk Usage: {disk_percent}%")


get_system_resource_usage()


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Not connected to a GPU')
else:
    print(gpu_info)

In [None]:
!pip install -q diffusers transformers accelerate bitsandbytes

In [None]:
from huggingface_hub import login
from google.colab import userdata


hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
!pip install datasets

In [None]:
import torch
from google.colab import userdata
from huggingface_hub import login
from transformers import pipeline
from diffusers import DiffusionPipeline
from datasets import load_dataset
import soundfile as sf
from IPython.display import Audio

In this section, we perform sentiment analysis using a pretrained transformer model via Hugging Face's pipeline. The code initializes a sentiment-analysis pipeline on the GPU and applies it to an example sentence, outputting whether the sentiment is positive or negative along with confidence scores.

In [None]:
# Sentiment Analysis

classifier = pipeline("sentiment-analysis", device="cuda")
result = classifier("I'm  super excited to be on the way to LLM mastery!")
print(result)

This section sets up a named entity recognition (NER) pipeline using a pretrained transformer model. It groups recognized entities and processes a sample sentence to extract and display entities such as names, locations, and organizations.

In [None]:
# Named Entity Recognition

ner = pipeline("ner", grouped_entities=True, device="cuda")
result = ner("PremKumar Kora is a Data Scientist from India")
print(result)

Here, we use a question-answering pipeline by providing both a question and context. The code loads the model on the GPU, runs inference to find the answer span within the provided context, and outputs the answer with a confidence score.

In [None]:
# Question Answering with Context

question_answerer = pipeline("question-answering", device="cuda")
result = question_answerer(question="Who was the 44th president of the United States?", context="Barack Obama was the 44th president of the United States.")
print(result)

This section demonstrates text summarization using a transformer-based summarization pipeline. It processes a longer text input and outputs a concise summary that captures the key points.

In [None]:
# Text Summarization

summarizer = pipeline("summarization", device="cuda")
text = """The Hugging Face transformers library is an incredibly versatile and powerful tool for natural language processing (NLP).
It allows users to perform a wide range of tasks such as text classification, named entity recognition, and question answering, among others.
It's an extremely popular library that's widely used by the open-source data science community.
It lowers the barrier to entry into the field by providing Data Scientists with a productive, convenient way to work with transformer models.
"""
summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
print(summary[0]['summary_text'])

In this cell, we perform machine translation using a pretrained translation pipeline. The code translates input text from one language to another and prints the translated output.

In [None]:
# Translation

translator = pipeline("translation_en_to_fr", device="cuda")
result = translator("The Data Scientists were truly amazed by the power and simplicity of the HuggingFace pipeline API.")
print(result[0]['translation_text'])

This example illustrates how to specify a particular translation model by name when creating the pipeline. It translates text using the explicitly defined model, showing how to override the default.

In [None]:
# Another translation, showing a model being specified
# All translation models are here: https://huggingface.co/models?pipeline_tag=translation&sort=trending

translator = pipeline("translation_en_to_es", model="Helsinki-NLP/opus-mt-en-es", device="cuda")
result = translator("The Data Scientists were truly amazed by the power and simplicity of the HuggingFace pipeline API.")
print(result[0]['translation_text'])

This section showcases text classification beyond sentiment analysis. The code loads a classification pipeline, applies it to input text, and prints the predicted class labels along with their scores.

In [None]:
# Classification

classifier = pipeline("zero-shot-classification", device="cuda")
result = classifier("Hugging Face's Transformers library is amazing!", candidate_labels=["technology", "sports", "politics"])
print(result)

Here, we generate new text using a language generation pipeline. The cell loads a text-generation model, provides a prompt, and outputs generated continuations or completions.

In [None]:
# Text Generation

generator = pipeline("text-generation", device="cuda")
result = generator("If there's one thing I want you to remember about using HuggingFace pipelines, it's")
print(result[0]['generated_text'])

This part of the notebook uses an image generation pipeline (e.g., Stable Diffusion) to create images from text prompts. The code loads the model, generates an image based on a prompt, and displays or saves the result.

In [None]:
# Image Generation

image_gen = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-2",
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16"
    ).to("cuda")

text = "give image of Madurai Meenachi Temple in sketch style"
image = image_gen(prompt=text).images[0]
image

In this section, we use an audio generation pipeline to synthesize audio from text or other inputs. The code loads the model, generates audio samples, and outputs them for playback or saving.

In [None]:
# Audio Generation

synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts", device='cuda')

embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

speech = synthesiser("Hi, I would like to introduce PremKumar Kora, A renowned Data Scientist", forward_params={"speaker_embeddings": speaker_embedding})

sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
Audio("speech.wav")

This cell provides background on the MusicLDM model, which was trained on 466 hours of music data. It explains the training dataset size and context before using the model for music generation tasks.

In [None]:
# MusicLDM is trained on a corpus of 466 hours of music data.
# Beat-synchronous data augmentation strategies are applied to the music samples, both in the
# time domain and in the latent space. Using beat-synchronous data augmentation strategies
# encourages the model to interpolate between the training samples, but stay within the domain
# of the training data. The result is generated music that is more diverse while staying faithful
# to the corresponding style.

from diffusers import MusicLDMPipeline
import torch
import scipy

repo_id = "ucsd-reach/musicldm"
pipe = MusicLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

prompt = "heavy drums mixed with gutar"
audio = pipe(prompt, num_inference_steps=10, audio_length_in_s=15.0).audios[0]

# save the audio sample as a .wav file
scipy.io.wavfile.write("flute.wav", rate=16000, data=audio)
Audio("flute.wav")