In [15]:
import streamlit as st
import pandas as pd
import numpy as np
import openai
import os
import sys
import json

# Data Import

In [16]:
# Load the data from XLSX
airline_financials = pd.read_excel("airline_financial_data.xlsx", sheet_name="airline_financials") # primary financial data and metrics

In [17]:
# Define the list of airlines
airlines = sorted(airline_financials["Airline"].unique())
airlines

['AAL', 'DAL', 'LUV', 'UAL']

In [18]:
# Define the list of years
years = sorted(airline_financials["Year"].unique())
years

[2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]

In [19]:
# Define the list of periods
periods = sorted(airline_financials["Quarter"].apply(lambda x: f"Q{x}" if x != "FY" else x).unique())
periods

['FY', 'Q1', 'Q2', 'Q3', 'Q4']

# Build Summary Dictionary

In [None]:
openai.api_key = "INSERT API KEY HERE"

In [21]:
# Define function to prompt the ChatGPT API and return a summary from training data
# Define the API call and prompt
def get_sec_filings_summary(airline, year, period):
    prompt = f"""
    Analyze all SEC filings for {airline} from {year}{period}, including all 10-Q, 10-K, 8-K filings, annual reports, and other filings.  
    Provide the top insights for the year and period specified. Focus on the data from {year}{period} and ignore discussion of previous periods unless it provides meaningful context for current results. Provide up to 10 insights. Insights should be related to key developments in the following areas: financial, operational, commercial stratgy, labor, executive personnel, and route network. Do NOT include a topic if there is no relevant data or if there is nothing meaningful to report.
    Do NOT under any circumstances fabricate names, dates, or numerical figures. Ensure the values are present in the underlying data. A fabrication is content not present in the SEC filings including but not limited to any mention of 'John Doe' or 'Jane Doe'.
    Be sure to highlight any major events and their impacts and provide additional context. 
    Format the response in a structured list format grouped by topic. Present insights in chronological order as best as possible. Length of each item should fully detail the insight while being easy to read and digest. Include relevant names when discussing personnel matters. Include accurate figures when discussing financial or other metrics.
    End the response with a single paragraph "Wrap Up".
    """
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are an expert financial analyst summarizing SEC filings and presenting them for public consumption. Accuracy is paramount, but you should provide interesting and revelatory insights. Language and style should be a cross between an investment analyst report and business media reporting."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.3
    )
    return response.choices[0].message.content

In [29]:
def insights_pipeline(airlines, years, periods, output_file="non_SEC_summaries.json"):


    # Check if the file exists and is empty, write an empty JSON array if so
    if not os.path.exists(output_file) or os.stat(output_file).st_size == 0:
        with open(output_file, 'w') as f:
            json.dump({}, f)

    # Load existing summaries if the file exists
    with open(output_file, "r") as f:
        try:
            # Load existing data to append to
            non_SEC_summaries = json.load(f)
        except json.JSONDecodeError:
            # Handle case where file exists but is not valid JSON
            non_SEC_summaries = {}

    for airline in airlines:
        if airline not in non_SEC_summaries:
            non_SEC_summaries[airline] = {}

        for year in years:
            if str(year) not in non_SEC_summaries[airline]:
                non_SEC_summaries[airline][str(year)] = {}

            for period in periods:
                if period in non_SEC_summaries[airline][str(year)]:
                    print(f"Skipping {airline} {year}{period} (already summarized).")
                    continue  # Skip if already summarized

                print(f"Summarizing the {year}{period} filings for {airline}...")

                summary = get_sec_filings_summary(airline, year, period)

                # Save into hierarchy
                non_SEC_summaries[airline][str(year)][period] = summary

                # Save to JSON after each summary
                with open(output_file, "w") as f:
                    json.dump(non_SEC_summaries, f, indent=2)

                print(f"Summary saved for the {year}{period} filings for {airline}.")

    return non_SEC_summaries

In [34]:
airline = ["DAL", "LUV"]
year = years[:-1]
period = periods

In [35]:
non_SEC_summaries = insights_pipeline(airline, year, period)

Summarizing the 2014FY filings for DAL...
Summary saved for the 2014FY filings for DAL.
Summarizing the 2014Q1 filings for DAL...
Summary saved for the 2014Q1 filings for DAL.
Summarizing the 2014Q2 filings for DAL...
Summary saved for the 2014Q2 filings for DAL.
Summarizing the 2014Q3 filings for DAL...
Summary saved for the 2014Q3 filings for DAL.
Summarizing the 2014Q4 filings for DAL...
Summary saved for the 2014Q4 filings for DAL.
Summarizing the 2015FY filings for DAL...
Summary saved for the 2015FY filings for DAL.
Summarizing the 2015Q1 filings for DAL...
Summary saved for the 2015Q1 filings for DAL.
Summarizing the 2015Q2 filings for DAL...
Summary saved for the 2015Q2 filings for DAL.
Summarizing the 2015Q3 filings for DAL...
Summary saved for the 2015Q3 filings for DAL.
Summarizing the 2015Q4 filings for DAL...
Summary saved for the 2015Q4 filings for DAL.
Summarizing the 2016FY filings for DAL...
Summary saved for the 2016FY filings for DAL.
Summarizing the 2016Q1 filings f

In [31]:
non_SEC_summaries

{'DAL': {'2022': {'Q1': "### Insights from Delta Air Lines (DAL) SEC Filings for 2022 Q1\n\n#### Financial Insights\n1. **Revenue Recovery**: In the Q1 2022 10-Q filing, Delta reported total operating revenue of $9.35 billion, a significant increase compared to $4.15 billion in Q1 2021. This surge reflects a robust recovery in travel demand as pandemic-related restrictions eased, with passenger revenue reaching $7.1 billion, up from $2.2 billion year-over-year.\n\n2. **Net Loss Reduction**: Delta reported a net loss of $1.2 billion for Q1 2022, a notable improvement from a net loss of $1.6 billion in Q1 2021. The narrowing of losses indicates effective cost management and a rebound in operational performance as travel demand picked up.\n\n3. **Operating Expenses**: Operating expenses for Q1 2022 totaled $10.6 billion, a 10% increase from the previous year. The rise in costs was primarily driven by higher fuel prices, which averaged $3.24 per gallon, up from $1.82 per gallon in Q1 2021.