In [None]:
from pathlib import Path
import llm
import json
from pydantic import BaseModel
from typing import Literal

## Load and Parse the Datafile

In [2]:
data_file = Path("./data/Copy_of_2306_SDA_Story.txt")
assert data_file.exists(), f"Data file {data_file} does not exist."
data = data_file.read_text(encoding="utf-8")

In [3]:
lines = data.splitlines()
chunks = []
current_chunk = ""

for line in lines:
    if line.startswith("06_SDA_"):
        chunks.append(current_chunk)
        current_chunk = line
    else:
        current_chunk += "\n" + line
chunks.append(current_chunk)
chunks = [chunk.strip() for chunk in chunks if chunk.strip()]

In [7]:
print(f"Number of chunks: {len(chunks)}\n")
print(f"First chunk:\n\n{chunks[0]}")

Number of chunks: 16

First chunk:

06_SDA_06232023.m4a [Week 1] [00:00:00 - 00:01:05]
Speaker 1: During my time at [the program], I hope to learn how to work
in a company and also understand what it's like to work in a corporate
environment. I also want to learn as much as they can from my
instructors, which I've met here at [THE PROGRAM]. Personally, I want to
improve my skills in my profession of choice, which is design. I also
want to improve my social skills, which also branches towards like
professionalism. I've heard like a lot of times like this word in like
professional and like education environments where everyone says, Oh
yeah, you gotta you got to network with other people. like networking
for some reason is always like very important as I heard. and I hope I
can do that here at [the program]. maybe I hope that I can be
comfortable enough with myself and in the environment to talk with other
people and grow like, um, social wise.


## Run the Chunks through a Model

### Question: 

How does the sentiment of participant’s diary entries change over the course of the 9-weeks?

In [None]:
class Sentiment(BaseModel):
    # restrict value to "hopeful" or "pessimistic"
    sentiment: Literal["hopeful", "pessimistic"]


model = llm.get_model("gpt-4o-mini")
for i, chunk in enumerate(chunks):
    response = model.prompt(
        chunk,
        system="What is the senbtiment of the following text? Is it hopeful or pessimistic?",
        schema=Sentiment,
    )
    sentiment = json.loads(response.text())
    print(f"Sentiment for chunk {i}: {sentiment['sentiment']}")

Sentiment for chunk 0: hopeful
Sentiment for chunk 1: hopeful
Sentiment for chunk 2: hopeful
Sentiment for chunk 3: hopeful
Sentiment for chunk 4: hopeful
Sentiment for chunk 5: pessimistic
Sentiment for chunk 6: hopeful
Sentiment for chunk 7: hopeful
Sentiment for chunk 8: hopeful
Sentiment for chunk 9: hopeful
Sentiment for chunk 10: hopeful
Sentiment for chunk 11: hopeful
Sentiment for chunk 12: hopeful
Sentiment for chunk 13: hopeful
Sentiment for chunk 14: pessimistic
Sentiment for chunk 15: hopeful


In [20]:
print(chunks[5])

06_SDA_07102023.m4a [Week 4A] [00:00:00 - 00:02:34]
Speaker 1: So this weekend I lost a lot of time because I had to leave
my house. It was being painted over. I had to turn off the Wi-Fi. So I
lost a whole day of work I could have done. let’s see, that was actually
one of my goals this week, which was to start a new portfolio that I
promised I would make, as well as making a new resumé with a better
design. And also, I wanted to start my LinkedIn page. and obviously I
need to turn in all work by Friday as well for class work core
curriculum. There's also a lot of pressure right now to do well with
that specific project due to the fact that now there's like more people
looking over my shoulder [indistinct] they are the, the two new people
are like from Royal College of the Arts. So I’m gonna have to make
something that looks halfway decent, hopefully. And for things I'm
looking forward to. I'm looking forward to meeting up with my mentor
again. because now that we're starting off with 