## DATA Collecting & Storing

### Using wunderground API in order to find Historical Weather Data of NEW YORK for past 5 Years (2012-2017).
### Using quandl API in order to find Historical Stock Data of Different Sectors for past 5 Years (2012-2017).

#### Note - API key is stored in Enviornment Variable and retrieved through OS Library

In [1]:
import os # For Directory Path
import requests # For Making Web Request
import json # For Json Data
import glob
import shutil # to move or copy files
import csv
from datetime import date
from dateutil.rrule import rrule, DAILY

#### Step 1 (Data Collection)
### Here I am collecting DATA for Weather & Stocks


#### WEATHER
- Collecting Data from API using (Request) in JSON form.
- Storing that JSON file in final/data/weatherHistory for last 5 Years (2012-2017).



In [2]:
#function to check is directory exists
def funCheckDir(path):
    directory = os.path.dirname(path) # defining directory path
    if not os.path.exists(directory): # checking if directory already exists
        os.makedirs(directory) # making a directory

In [3]:
# Getting Relative Path
relativePath = os.getcwd()

# Directory Checking Function
resultPath = relativePath+"//"+"final//WeatherHistory//"
funCheckDir(resultPath)

# Api key stored in ENV.
apiKey = os.getenv('weatherApi')

In [4]:
def get_weatherData(weatherDate):
    urlstart = 'http://api.wunderground.com/api/'+apiKey+'/history_'
    urlend = '/q/NY/new-york.json'

    url = urlstart + str(weatherDate) + urlend
    data = requests.get(url).json()
    # storing this data in midterm/data/articles
    with open(resultPath+"//"+weatherDate+'.json', 'w') as file:
        json.dump(data, file)
        
        
startDate = date(2012, 1, 1)
endDate = date(2017, 4, 8)
for dt in rrule(DAILY, dtstart=startDate, until=endDate):
    get_weatherData(dt.strftime("%Y%m%d"))

#### STOCKS
- Collecting Data from API using (Request) in JSON form.
- Storing that JSON file in final/data/Stocks for last 5 Years (2012-2017).

In [5]:
# Here we are targetting different sectors like -:
# Oil & Gas / Food / Energy / Beverage
# Creating Dictionary for these sectors
sectors = {
    "oil & Gas" : 'CVX',
    "Food" : 'CMG',
    "Beverage" : 'PEP',
    "Energy" : 'NRG',
    "Solar"  : 'FSLR'
}


# Directory Checking Function
stockPath = relativePath+"//"+"final//stock//"
funCheckDir(stockPath)

# Api key stored in ENV.
apiKeyStock = os.getenv('stockApi')
sDate = '2012-01-01'
eDate = '2017-04-08'

# Defining function in order to get data from api
def get_data(key, value):
    # Setting api url
    stockUrl = 'https://www.quandl.com/api/v3/datasets/WIKI/'+str(value)+'.json?&start_date='+str(sDate)+'&end_date='+str(eDate)+'&api_key='+str(apiKeyStock)
    stockData = requests.get(stockUrl).json()
    with open(stockPath+"//"+key+'-'+value+'.json', 'w') as file:
        json.dump(stockData, file)
    

for key, value in sectors.items():
    get_data(key, value)

#### Step 2 (Data Storing)
- Iterating through all json files store in resultpath/weatherHistory folder.
- Extracting useful information like Max & Min Temp.
- Storing in resultpath/weatherData as a CSV file name (weather.csv)

In [5]:
# using glob function to read all json files in Data Folder.
fileList = glob.glob(resultPath + '//' +'*json')
weather = []

# looping through all the json files in order to find useful information.
for file in range(len(fileList)):
    # json loads is use to read json data.
    config = json.loads(open(fileList[file]).read())
    for summary in config['history']['dailysummary']:
        dataset = (summary['date']['year'] + "-" + summary['date']['mon'] + "-" + summary['date']['mday'], summary['precipm'], summary['maxtempm'], summary['meantempm'],summary['mintempm'])
        weather.append(dataset)

    
# Directory Checking Function
dataPath = relativePath+"//"+"final//WeatherData//"
funCheckDir(dataPath)

# Opening CSV file and exporting all data set.                
with open(dataPath+"/"+'weather.csv', 'w', newline='') as fp:
    csvWrite = csv.writer(fp, delimiter=',')
    csvWrite.writerow(["Date", "Precipitation", "Max Temp", "Mean Temp", "Min Temp"])
    csvWrite.writerows(weather)
    fp.close()

#### Stock API DATA Storing

In [6]:
# using glob function to read all json files in Data Folder.
fileListStock = glob.glob(stockPath + '//' +'*.json')

# Directory Checking Function
sPath = relativePath+"//"+"final//stockData//"
funCheckDir(sPath)

for x in range(len(fileListStock)):
    stockData = []
    config = json.loads(open(fileListStock[x]).read())
    fName = config['dataset']['dataset_code']
    colName = config['dataset']['column_names']
    colName.insert(1, 'company_code')
    for data in config['dataset']['data']:
        newDataL = data
        newDataL.insert(1, fName)
        stockData.append(newDataL)
    # Opening CSV file and exporting all data set.                
    with open(sPath+"/"+fName+'.csv', 'w', newline='') as fp:
        csvWrite = csv.writer(fp, delimiter=',')
        csvWrite.writerow(colName)
        csvWrite.writerows(stockData)
        fp.close()

#### NOTE- The API for Stock Data doesn't provioded me stocks for wind company
#### So I used Yahoo Finance Website in order to download it.

In [17]:
windUrl = 'http://chart.finance.yahoo.com/table.csv?s=VWS.CO&a=0&b=1&c=2012&d=3&e=16&f=2017&g=d&ignore=.csv'
# Directory Checking Function
savePath = relativePath+"//"+"final//extra//"
funCheckDir(savePath)


vwsData = requests.get(windUrl)
with open(savePath+"//VWS.csv", 'wb') as file:
    file.write(vwsData.content)