In [1]:
from nceiDatabaseConnector.nceiDatabasePackage.nceiDataManager import NCEIDataManager
from nceiDatabaseConnector.nceiDatabasePackage.nceiDatabaseManager import NCEIDatabaseManager

In [2]:
years = [1949, 1950]
stationsFilePath = "./data/stations/"
modifiedStationsFilePath = "./data/stations/modifiedStations/"
downloadCSVFilePath = "./data/NCEI/ghcn/daily/"
modifiedCSVFilePath = "./data/NCEI/modified/daily/"
dbname = "NCEIDatabase"
dbuser = "ESDP"
dbpassword = "esdp1"


In [3]:
#This command writes the dbname, dbuser and dbpassword to an env file which is used by the docker-container during postgresql setup
envVariables = {
    "DATABASE_NAME": dbname,
    "DATABASE_USER": dbuser,
    "DATABASE_PASSWORD": dbpassword,
}

with open(".env", "w") as env_file:
    for key, value in envVariables.items():
        env_file.write(f"{key}={value}\n")

print(".env file created successfully.")


.env file created successfully.


In [4]:
#!docker-compose up -d --build
#!docker-compose up -d

In [5]:
host = "localhost"
port = "5432"

#this defines the names of the columns in the postgresql database
stationCols = ["id", "latitude", "longitude", "elevation", "state", "name", "gsn_flag", "hcn_crn_flag", "wmo_id"]
weatherCols = ["id", "stationcode", "datelabel", "param", "value", "mflag", "qflag", "sflag", "time"]


In [6]:
dataManager = NCEIDataManager()
nceiDatabaseManager = NCEIDatabaseManager(db_name=dbname, db_user=dbuser, db_password=dbpassword, db_host=host, db_port=port)

Connected to database NCEIDatabase with user ESDP


In [7]:
dataManager.download_stations(stationsFilePath)

Data downloaded. Will be saved as ghcnd-stations.txt in ./data/stations/
Data downloaded. Will be saved as readme.txt in ./data/stations/
Data downloaded and saved in ./data/stations/


In [10]:
dataManager.convert_stations(stationsFilePath, modifiedStationsFilePath)

Saved the modified stations file to ./data/stations/modifiedStations//modified_stations.csv


In [11]:
dataManager.download_years(years, downloadCSVFilePath)

...Downloading data from year 1949....
Data downloaded. Will be saved as 1949.csv.gz
Data from year 1949 downloaded and saved.
...Downloading data from year 1950....
Data downloaded. Will be saved as 1950.csv.gz
Data from year 1950 downloaded and saved.


In [12]:
dataManager.export_downloaded_years(years, downloadCSVFilePath, modifiedCSVFilePath)

...Year 1949 processing...
Loading data from year 1949
Data from year 1949 loaded.
Export of year 1949 finished.
...Year 1950 processing...
Loading data from year 1950
Data from year 1950 loaded.
Export of year 1950 finished.


In [13]:
nceiDatabaseManager.create_stations_table()

Station table created.


In [14]:
file_path = f"{modifiedStationsFilePath}modified_stations.csv"
nceiDatabaseManager.insert_copy(file_path, "Station", stationCols)

Copying file ./data/stations/modifiedStations/modified_stations.csv to database Station
Insert with copy of file ./data/stations/modifiedStations/modified_stations.csv to table: Station done.
Row count after insertion in table Station: 127994


In [8]:
nceiDatabaseManager.drop_table("Climate1949")
nceiDatabaseManager.drop_table("Climate1950")

Table Climate1949 dropped successfully.
Table Climate1950 dropped successfully.


In [9]:
for year in years:
    filePath = f"{modifiedCSVFilePath}modified_{year}.csv"
    nceiDatabaseManager.create_climate_table(year=year)
    tableName = f"Climate{year}"
    nceiDatabaseManager.multi_threaded_insert(file_path=filePath, table_name=tableName, columns=weatherCols, num_threads=4)


Table Climate1949 created successfully.
Total Number of lines: 18089253
Chunk size is 4522314.0
Splitting ./data/NCEI/modified/daily/modified_1949.csv into 4 chunks of size 4522314.0.
Writing chunk 0 file to ./data/NCEI/modified/daily/modified_1949_chunk0.csv
Writing chunk 1 file to ./data/NCEI/modified/daily/modified_1949_chunk1.csv
Writing chunk 2 file to ./data/NCEI/modified/daily/modified_1949_chunk2.csv
Thread started for chunk: ./data/NCEI/modified/daily/modified_1949_chunk0.csv
Thread started for chunk: ./data/NCEI/modified/daily/modified_1949_chunk1.csv
Thread started for chunk: ./data/NCEI/modified/daily/modified_1949_chunk2.csv
Thread started for chunk: ./data/NCEI/modified/daily/modified_1949_chunk3.csv
Thread for chunk ./data/NCEI/modified/daily/modified_1949_chunk3.csv completed.
Thread for chunk ./data/NCEI/modified/daily/modified_1949_chunk1.csv completed.
Thread for chunk ./data/NCEI/modified/daily/modified_1949_chunk0.csv completed.
Thread for chunk ./data/NCEI/modifie