# New notebook

In [2]:
import os
import datetime
import string
import pandas as pd
import sqlite3 as db
from sqlalchemy import create_engine
import yfinance as yf

file_storage = "test2"

In [2]:
symbols_data = [
        {
            "Symbol": "A",
            "Name": "Agilent Technologies Inc",
            "ListedDt": datetime.datetime(2005, 1, 3).isoformat(),
            "LastDt": datetime.datetime(2022, 9, 6).isoformat(),
            "Status": "Active",
        },
        {
            "Symbol": "AA",
            "Name": "Alcoa Corporation",
            "ListedDt": datetime.datetime(2016, 10, 18).isoformat(),
            "LastDt": datetime.datetime(2022, 9, 6).isoformat(),
            "Status": "Active",
        },
        {
            "Symbol": "ZGNX",
            "Name": "Zogenix",
            "ListedDt": datetime.datetime(2010, 11, 23).isoformat(),
            "LastDt": datetime.datetime(2022, 3, 4).isoformat(),
            "Status": "Active",
        }
    ]

update_data = [
        {
            "Symbol": "AA",
            "Name": "Updated Alcoa Corporation",
            "ListedDt": datetime.datetime(2022, 10, 18).isoformat(),
            "LastDt": datetime.datetime(2022, 9, 6).isoformat(),
            "Status": "Active",
        },
        {
            "Symbol": "ZGNX",
            "Name": "Updated Zogenix",
            "ListedDt": datetime.datetime(2012, 11, 23).isoformat(),
            "LastDt": datetime.datetime(2022, 3, 4).isoformat(),
            "Status": "Active",
        },
        {
            "Symbol": "BC",
            "Name": "Basic Company",
            "ListedDt": datetime.datetime(2021, 1, 1).isoformat(),
            "LastDt": datetime.datetime(2022, 3, 4).isoformat(),
            "Status": "Active",
        }
    ]

In [3]:
source = pd.DataFrame(symbols_data)
new_data = pd.DataFrame(update_data)

In [4]:
suffixe_new = '_new'
cols_old = source.columns
cols_new = [f"{col}{suffixe_new}" for col in cols_old]
cols_new[0] = cols_old[0]

In [5]:
cols_old

Index(['Symbol', 'Name', 'ListedDt', 'LastDt', 'Status'], dtype='object')

In [6]:
cols_new

['Symbol', 'Name_new', 'ListedDt_new', 'LastDt_new', 'Status_new']

In [7]:
merged = pd.merge(source, new_data, on='Symbol', how='outer', indicator=True, suffixes=['', '_new'])
merged

Unnamed: 0,Symbol,Name,ListedDt,LastDt,Status,Name_new,ListedDt_new,LastDt_new,Status_new,_merge
0,A,Agilent Technologies Inc,2005-01-03T00:00:00,2022-09-06T00:00:00,Active,,,,,left_only
1,AA,Alcoa Corporation,2016-10-18T00:00:00,2022-09-06T00:00:00,Active,Updated Alcoa Corporation,2022-10-18T00:00:00,2022-09-06T00:00:00,Active,both
2,ZGNX,Zogenix,2010-11-23T00:00:00,2022-03-04T00:00:00,Active,Updated Zogenix,2012-11-23T00:00:00,2022-03-04T00:00:00,Active,both
3,BC,,,,,Basic Company,2021-01-01T00:00:00,2022-03-04T00:00:00,Active,right_only


In [8]:
old = merged.loc[merged._merge == 'left_only'][cols_old]
old

Unnamed: 0,Symbol,Name,ListedDt,LastDt,Status
0,A,Agilent Technologies Inc,2005-01-03T00:00:00,2022-09-06T00:00:00,Active


In [9]:
updates = merged.loc[merged._merge == 'both'][cols_new]
updates.columns = cols_old
updates

Unnamed: 0,Symbol,Name,ListedDt,LastDt,Status
1,AA,Updated Alcoa Corporation,2022-10-18T00:00:00,2022-09-06T00:00:00,Active
2,ZGNX,Updated Zogenix,2012-11-23T00:00:00,2022-03-04T00:00:00,Active


In [10]:
new_sym = merged.loc[merged._merge == 'right_only'][cols_new]
new_sym.columns = cols_old
new_sym

Unnamed: 0,Symbol,Name,ListedDt,LastDt,Status
3,BC,Basic Company,2021-01-01T00:00:00,2022-03-04T00:00:00,Active


In [11]:
new_save = pd.concat([old, updates, new_sym], ignore_index=True)
new_save

Unnamed: 0,Symbol,Name,ListedDt,LastDt,Status
0,A,Agilent Technologies Inc,2005-01-03T00:00:00,2022-09-06T00:00:00,Active
1,AA,Updated Alcoa Corporation,2022-10-18T00:00:00,2022-09-06T00:00:00,Active
2,ZGNX,Updated Zogenix,2012-11-23T00:00:00,2022-03-04T00:00:00,Active
3,BC,Basic Company,2021-01-01T00:00:00,2022-03-04T00:00:00,Active


In [12]:
new_save.to_csv(f'{file_storage}.csv', index=False)

In [13]:
engine = create_engine(f"sqlite:///{file_storage}.sqlite")
new_save.to_sql('Symbols', engine, if_exists='replace', index=False)

4

In [14]:
fn = f'{file_storage}.parquet'
new_save.to_parquet(fn, index=False)
parq = pd.read_parquet(fn)
parq

Unnamed: 0,Symbol,Name,ListedDt,LastDt,Status
0,A,Agilent Technologies Inc,2005-01-03T00:00:00,2022-09-06T00:00:00,Active
1,AA,Updated Alcoa Corporation,2022-10-18T00:00:00,2022-09-06T00:00:00,Active
2,ZGNX,Updated Zogenix,2012-11-23T00:00:00,2022-03-04T00:00:00,Active
3,BC,Basic Company,2021-01-01T00:00:00,2022-03-04T00:00:00,Active


In [12]:
# https://eoddata.com/stocklist/NASDAQ/A.htm
data = pd.read_html('https://eoddata.com/stocklist/NASDAQ/A.htm')
data[4]

Unnamed: 0,Code,Name,High,Low,Close,Volume,Change,Change.1,Change.2,Unnamed: 9
0,AACG,Ata Creativity Global ADR,1.9500,1.8100,1.8100,12200,-0.0400,,2.16,
1,AACI,Armada Acquisition Corp I,9.9200,9.9200,9.9200,1900,0.0200,,0.20,
2,AACIU,Armada Acquisition Corp I,9.9600,9.9600,9.9600,300,0.0500,,0.50,
3,AACIW,Armada Acquisition Corp I WT,0.1797,0.1797,0.1797,300,0.0575,,47.05,
4,AADI,Aadi Biosciences Inc,12.8100,12.1400,12.5000,935300,-0.2600,,2.04,
...,...,...,...,...,...,...,...,...,...,...
568,AZ,A2Z Smart Technologies Corp,3.0000,2.7900,2.9900,8500,-0.1200,,3.86,
569,AZN,Astrazeneca Plc ADR,58.7600,57.5800,58.0500,7018000,-0.0900,,0.15,
570,AZPN,Aspen Technology,227.3000,217.4000,226.4000,591400,3.3000,,1.46,
571,AZTA,Azenta Inc,50.4700,48.5100,50.3400,1758600,-0.8100,,1.58,


In [14]:
new_data = pd.concat([data[4],data[4]])
new_data.sort_values('Code')

Unnamed: 0,Code,Name,High,Low,Close,Volume,Change,Change.1,Change.2,Unnamed: 9
0,AACG,Ata Creativity Global ADR,1.95,1.81,1.81,12200,-0.04,,2.16,
0,AACG,Ata Creativity Global ADR,1.95,1.81,1.81,12200,-0.04,,2.16,
1,AACI,Armada Acquisition Corp I,9.92,9.92,9.92,1900,0.02,,0.20,
1,AACI,Armada Acquisition Corp I,9.92,9.92,9.92,1900,0.02,,0.20,
2,AACIU,Armada Acquisition Corp I,9.96,9.96,9.96,300,0.05,,0.50,
...,...,...,...,...,...,...,...,...,...,...
570,AZPN,Aspen Technology,227.30,217.40,226.40,591400,3.30,,1.46,
571,AZTA,Azenta Inc,50.47,48.51,50.34,1758600,-0.81,,1.58,
571,AZTA,Azenta Inc,50.47,48.51,50.34,1758600,-0.81,,1.58,
572,AZYO,Aziyo Biologics Inc Cl A,7.13,6.50,6.50,9200,-0.75,,10.34,


In [17]:
no_duplicates = new_data.drop_duplicates(subset='Code', keep='first', inplace=False)
no_duplicates.sort_values('Code')

Unnamed: 0,Code,Name,High,Low,Close,Volume,Change,Change.1,Change.2,Unnamed: 9
0,AACG,Ata Creativity Global ADR,1.9500,1.8100,1.8100,12200,-0.0400,,2.16,
1,AACI,Armada Acquisition Corp I,9.9200,9.9200,9.9200,1900,0.0200,,0.20,
2,AACIU,Armada Acquisition Corp I,9.9600,9.9600,9.9600,300,0.0500,,0.50,
3,AACIW,Armada Acquisition Corp I WT,0.1797,0.1797,0.1797,300,0.0575,,47.05,
4,AADI,Aadi Biosciences Inc,12.8100,12.1400,12.5000,935300,-0.2600,,2.04,
...,...,...,...,...,...,...,...,...,...,...
568,AZ,A2Z Smart Technologies Corp,3.0000,2.7900,2.9900,8500,-0.1200,,3.86,
569,AZN,Astrazeneca Plc ADR,58.7600,57.5800,58.0500,7018000,-0.0900,,0.15,
570,AZPN,Aspen Technology,227.3000,217.4000,226.4000,591400,3.3000,,1.46,
571,AZTA,Azenta Inc,50.4700,48.5100,50.3400,1758600,-0.8100,,1.58,


In [10]:
list(string.ascii_uppercase)

['A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z']

In [22]:
letters = list(string.ascii_uppercase)
letters = letters + ['0','1','2','3','4','5','6','7','8','9']
letters

['A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9']

In [27]:
list(string.digits)


['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [48]:
import os
from os import walk
from pathlib import Path

mypath = '2022-08-18'
filenames = next(walk(mypath), (None, None, []))[2]
filenames

['AMEX.csv',
 'AMEX.parquet',
 'AMEX.sqlite',
 'ASX.csv',
 'ASX.parquet',
 'ASX.sqlite',
 'LSE.csv',
 'LSE.parquet',
 'LSE.sqlite',
 'NASDAQ.csv',
 'NASDAQ.parquet',
 'NASDAQ.sqlite',
 'NYSE.csv',
 'NYSE.parquet',
 'NYSE.sqlite',
 'SGX.csv',
 'SGX.parquet',
 'SGX.sqlite',
 'TSX.csv',
 'TSX.parquet',
 'TSX.sqlite',
 'TSXV.csv',
 'TSXV.parquet',
 'TSXV.sqlite']

In [51]:
for file_n in filenames:
    fn = Path(file_n)
    fn.stem
    fn.suffix
    new_name = f"eoddata_{fn.stem.lower()}{fn.suffix}"
    print(f"Old name: {file_n}, New name: {new_name}")
    os.rename(os.path.join(mypath, file_n), os.path.join(mypath, new_name))


Old name: AMEX.csv, New name: eoddata_amex.csv
Old name: AMEX.parquet, New name: eoddata_amex.parquet
Old name: AMEX.sqlite, New name: eoddata_amex.sqlite
Old name: ASX.csv, New name: eoddata_asx.csv
Old name: ASX.parquet, New name: eoddata_asx.parquet
Old name: ASX.sqlite, New name: eoddata_asx.sqlite
Old name: LSE.csv, New name: eoddata_lse.csv
Old name: LSE.parquet, New name: eoddata_lse.parquet
Old name: LSE.sqlite, New name: eoddata_lse.sqlite
Old name: NASDAQ.csv, New name: eoddata_nasdaq.csv
Old name: NASDAQ.parquet, New name: eoddata_nasdaq.parquet
Old name: NASDAQ.sqlite, New name: eoddata_nasdaq.sqlite
Old name: NYSE.csv, New name: eoddata_nyse.csv
Old name: NYSE.parquet, New name: eoddata_nyse.parquet
Old name: NYSE.sqlite, New name: eoddata_nyse.sqlite
Old name: SGX.csv, New name: eoddata_sgx.csv
Old name: SGX.parquet, New name: eoddata_sgx.parquet
Old name: SGX.sqlite, New name: eoddata_sgx.sqlite
Old name: TSX.csv, New name: eoddata_tsx.csv
Old name: TSX.parquet, New name

In [8]:
nasdaq = pd.read_parquet(r'2022-08-18-Copie\NASDAQ.parquet')
nasdaq

Unnamed: 0,Symbol,Name
0,AACG,Ata Creativity Global ADR
1,AACI,Armada Acquisition Corp I
2,AACIU,Armada Acquisition Corp I
3,AACIW,Armada Acquisition Corp I WT
4,AADI,Aadi Biosciences Inc
...,...,...
11440,AZ,A2Z Smart Technologies Corp
11441,AZN,Astrazeneca Plc ADR
11442,AZPN,Aspen Technology
11443,AZTA,Azenta Inc


In [10]:
import yfinance as yf
data1 = yf.download("AAPL MSFT", start="2017-01-01", end="2017-04-30", group_by='ticker')


[*********************100%***********************]  2 of 2 completed


In [11]:
c_info = yf.Ticker("MSFT")
for ticker in nasdaq.Symbol:
    #c_info = yf.Ticker("MSFT")
    print(ticker)


AACG
AACI
AACIU
AACIW
AADI
AADR
AAL
AAME
AAOI
AAON
AAPB
AAPD
AAPL
AAPU
AATC
AAWW
AAXJ
ABCB
ABCL
ABCM
ABEO
ABGI
ABIO
ABMD
ABNB
ABOS
ABSI
ABST
ABTX
ABUS
ABVC
ACAB
ACABU
ACABW
ACAC
ACACU
ACACW
ACAD
ACAH
ACAHU
ACAHW
ACAX
ACAXR
ACAXU
ACAXW
ACB
ACBA
ACBAU
ACBAW
ACCD
ACER
ACET
ACEV
ACEVU
ACEVW
ACGL
ACGLN
ACGLO
ACHC
ACHL
ACHV
ACIU
ACIW
ACKIU
ACKIW
ACLS
ACLX
ACMR
ACNB
ACNT
ACON
ACONW
ACOR
ACQR
ACQRU
ACQRW
ACRS
ACRX
ACST
ACT
ACTG
ACVA
ACWI
ACWX
ACXP
ADAG
ADAL
ADALU
ADALW
ADAP
ADBE
ADER
ADERU
ADERW
ADES
ADGI
ADI
ADIL
ADILW
ADMA
ADMP
ADN
ADNWW
ADOC
ADOCR
ADOCW
ADP
ADPT
ADRE
ADSE
ADSEW
ADSK
ADTH
ADTHW
ADTN
ADTX
ADUS
ADV
ADVM
ADVWW
ADXN
AEAC
AEACU
AEACW
AEAE
AEAEU
AEAEW
AEHA
AEHAW
AEHL
AEHR
AEI
AEIS
AEMD
AEP
AEPPZ
AERC
AERI
AESE
AEY
AEYE
AEZS
AFAC
AFACU
AFACW
AFAQ
AFAQU
AFAQW
AFAR
AFARU
AFARW
AFBI
AFCG
AFIB
AFMD
AFRI
AFRIW
AFRM
AFYA
AGBA
AGBAR
AGBAW
AGEN
AGFS
AGFY
AGGR
AGGRU
AGGRW
AGIL
AGILW
AGIO
AGLE
AGMH
AGNC
AGNCL
AGNCM
AGNCN
AGNCO
AGNCP
AGNG
AGRI
AGRIW
AGRX
AGTC
AGYS
AGZD
AHCO
AHG
AHI
AHPA
AHPAU

In [7]:
c_info.info.get('sector')

'Technology'