# **Main Notebook for Thesis Project**

Portfolio Stocks:
1. AAPL (works)
2. NVDA (works)
3. TSLA
4. AMD
5. ADBE (works)

In [1]:
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup
# filespace
from data import Data

In [2]:
# agent for accessing API
AGENT = {'User-Agent': 'mehmet.ozturk@kcl.ac.uk'}
# getting CIK DataFrame
ciks = requests.get(
    'https://www.sec.gov/files/company_tickers.json',
    headers = AGENT
)
# success check
if ciks.status_code == 200:
    print(f'Status: [{ciks.status_code}], CIK data obtained successfully.')
else:
    print(f'Status: [{ciks.status_code}], error otaining CIK.')

Status: [200], CIK data obtained successfully.


In [3]:
# transforming into DataFrame
ciks = pd.DataFrame.from_dict(
    ciks.json(),
    orient = 'index'
)
# formatting to string
ciks = ciks.astype(str)
ciks.head()

Unnamed: 0,cik_str,ticker,title
0,320193,AAPL,Apple Inc.
1,789019,MSFT,MICROSOFT CORP
2,1045810,NVDA,NVIDIA CORP
3,1652044,GOOGL,Alphabet Inc.
4,1018724,AMZN,AMAZON COM INC


In [4]:
AAPL = Data('NVDA')
AAPL.get_cik(ciks)
AAPL.get_metadata()
AAPL.process_filings()


Respone: [200], metadata obtained successfully.
Respone: [200], filing obtained successfully.
Current filename:	 nvda2018q110q.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda2018q210q.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda2018q310q.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda-2018x10k.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda2019q110q.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda2019q210q.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda2019q310q.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda-2019x10k.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda2020q110q.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda2020q210q.htm
Respone: [200], filing obtained successfully.
Current filename:	 nvda2020q310q.htm
Respone: [200], filing obtained success

In [5]:
AAPL.data

{'2017-05-23': {'revenue': 1937,
  'gross profit': 1150,
  'operating income': 554,
  'net income': 507,
  'eps': 0.86,
  'current assets': 8116,
  'current liabilities': 983,
  'shareholder equity': 6132,
  'net cash operating': 282,
  'net cash investing': 754,
  'net cash financing': -813,
  'cash': 1989},
 '2017-08-23': {'revenue': 2230,
  'gross profit': 1302,
  'operating income': 688,
  'net income': 583,
  'eps': 0.98,
  'current assets': 8070,
  'current liabilities': 1032,
  'shareholder equity': 5973,
  'net cash operating': 705,
  'net cash investing': 262,
  'net cash financing': -968,
  'cash': 1988},
 '2017-11-21': {'revenue': 2636,
  'gross profit': 1569,
  'operating income': 895,
  'net income': 838,
  'eps': 1.39,
  'current assets': 8479,
  'current liabilities': 1027,
  'shareholder equity': 6352,
  'net cash operating': 1157,
  'net cash investing': 286,
  'net cash financing': -629,
  'cash': 2802},
 '2018-02-28': {'revenue': 2911,
  'gross profit': 1801,
  'oper

In [9]:
# Convert master dictionary to DataFrame
df = pd.DataFrame.from_dict(AAPL.data, orient='index')

# Rename the index column
df.index.name = 'date'

# Print the resulting DataFrame
print(df)

            current assets  current liabilities  shareholder equity  revenue  \
date                                                                           
2018-03-28            7481                 3536                8634     2079   
2018-06-27            7741                 3729                8706     2195   
2018-09-26            6301                 3792                8862     2291   
2019-01-25            4857                 4301                9362     2465   
2019-03-27            5134                 5314                9871     2601   
2019-06-26            5343                 7687                9932     2744   
2019-09-26            5750                 7803               10243     2834   
2020-01-21            6495                 8191               10530     2992   
2020-03-25            6455                 5228               10465     3091   
2020-06-24            6649                 5164               10881     3128   
2020-09-23            7387              

In [7]:
ADBE = Data('ADBE')
ADBE.get_cik(ciks)
ADBE.get_metadata()
ADBE.process_filings()

Respone: [200], metadata obtained successfully.
Respone: [200], filing obtained successfully.
Current filename:	 adbe10qq118.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe10qq218.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe10qq318.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe10kfy18.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe10qq119.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe10qq219.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe10qq319.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe10kfy19.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe10qq120.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe10qq220.htm
Respone: [200], filing obtained successfully.
Current filename:	 adbe-20200828.htm
Respone: [200], filing obtained successfully.
Current filen

In [8]:
ADBE.data

{'2018-03-28': {'current assets': 7481,
  'current liabilities': 3536,
  'shareholder equity': 8634,
  'revenue': 2079,
  'gross profit': 1820,
  'operating income': 703,
  'net income': 583,
  'eps': 1.18,
  'net cash operating': 990,
  'net cash investing': -94,
  'net cash financing': -541,
  'cash': 2667},
 '2018-06-27': {'current assets': 7741,
  'current liabilities': 3729,
  'shareholder equity': 8706,
  'revenue': 2195,
  'gross profit': 1914,
  'operating income': 698,
  'net income': 663,
  'eps': 1.35,
  'net cash operating': 976,
  'net cash investing': 68,
  'net cash financing': -718,
  'cash': 2988},
 '2018-09-26': {'current assets': 6301,
  'current liabilities': 3792,
  'shareholder equity': 8862,
  'revenue': 2291,
  'gross profit': 1996,
  'operating income': 719,
  'net income': 666,
  'eps': 1.36,
  'net cash operating': 955,
  'net cash investing': -1520,
  'net cash financing': -671,
  'cash': 1747},
 '2019-01-25': {'current assets': 4857,
  'current liabilities'