# Alternative Fuel Stations
---

**Purpose:**

Using file operations from the Python standard library

**Data Source:**

https://data.ny.gov/

In [None]:
# system libraries
import os
import sys

# check system information
print('Python Information', sys.version)
print('This is your current directory', os.getcwd())

In [None]:
# datetime libraries
import datetime

# assgin current date and time
currentDate = datetime.date.today()
currentTime = datetime.datetime.now()

# check datetime information
print('Today is {}'.format(currentDate))
print('Today is', datetime.datetime.strftime(currentDate, '%m/%d/%Y'))
print('The time is', datetime.datetime.strftime(currentTime, '%H:%M:%S'))

## Featured Libraries

In [None]:
import csv
import glob
import zipfile
import random
import re
from collections import Counter

In [None]:
!pip install wget

In [None]:
import wget

In [None]:
url = 'https://data.ny.gov/api/views/bpkx-gmh7/rows.csv?accessType=DOWNLOAD&sorting=true'
print(url)

In [None]:
filename = wget.download(url=url)

In [None]:
print(filename)

## Using Open (Readlines)

In [None]:
with open(filename, "r") as f:
    header = f.readline()
    data = f.readlines()

In [None]:
header.split(",")

In [None]:
len(data)

In [None]:
tesla = [x for x in data if "tesla" in x.lower()]

In [None]:
len(tesla)

In [None]:
tesla[:5]

In [None]:
tesla[-5:]

In [None]:
with open(file="Tesla.csv", mode="w") as f:
    f.write(header)
    f.writelines(tesla)

## Read & Write Together

In [None]:
source_file = "Tesla.csv"
target_file = "My_Alternative_Selections.csv"

with open(source_file, mode='r') as source:
    with open(target_file, mode='w') as target:
        target.write(source.readline())
        for line in source.readlines():
            if "Airport" in line:
                target.write(line)

# try "Airport" to see 2 different files
# try "NY" to make identical files

## Compare files

In [None]:
with open(source_file, mode='r') as f1:
    data1 = f1.readlines()[1:]

with open(target_file, mode='r') as f2:
    data2 = f2.readlines()[1:]

data1 == data2

In [None]:
set(data1) - set(data2)

In [None]:
set(data2) - set(data1)

In [None]:
set(data1).intersection(set(data2))

In [None]:
set(data2).intersection(set(data1))

## Using CSV Reader

In [None]:
with open(file=filename, newline="", encoding='latin1', mode="r") as f:
    data = list(csv.reader(f, delimiter = ","))
    header = data.pop(0)

In [None]:
print('Total Rows: {:,}'.format(len(data)))

In [None]:
for row in data[0:5]:
    print(row)

In [None]:
print('Checking a record...')
for i, x in enumerate(random.choice(data)):
    print(i, header[i], ":", x)

In [None]:
subset_data = ['Tesla']

tesla_list = [x for y in subset_data for x in data if y in x[1]]

list(zip(header, tesla_list[0]))

In [None]:
print('\nChecking the fields...')
for e, z in enumerate(header):
    data_sets = [x[e] for x in data]
    print("Index # {} for {} has {:,} records".format(e,z,len(set(data_sets))))

In [None]:
print('\nChecking some values...')
for k, v in Counter(x[6] for x in data).most_common(10):
    print(k, ">>> {:,}".format(v))

In [None]:
with open(file='alt_energy.csv', newline='', encoding='utf-8', mode='w') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows([x for x in data if x[6] == '10019'])        

In [None]:
fuel_type_code = set(x[0] for x in data)
fuel_type_code

In [None]:
for code in fuel_type_code:
    with open ('energy_type_code_{}.csv'.format(code), mode='w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(header)
        for d in data:
            if code == d[0]:
                writer.writerow(d)

In [None]:
ftc_files = glob.glob('energy_type_code*.csv')
ftc_files

In [None]:
with zipfile.ZipFile(file='energy_type_codes.zip', mode='w') as zp:
    for file in ftc_files:
        zp.write(file)

In [None]:
with zipfile.ZipFile(file='energy_type_codes.zip', mode='r') as uz:
    print(uz.namelist())
    uz.extractall('energy_type_codes_extract')

## Using CSV DictReader

In [None]:
with open(file=filename, mode="r") as csvfile:
    csv.DictReader.fieldnames = header
    data = list(csv.DictReader(csvfile))

In [None]:
len(data)

In [None]:
type(data)

In [None]:
type(data[1])

In [None]:
data[1]

In [None]:
for k,v in data[1].items():
    print(k, ">>>", v)

In [None]:
my_zips = []

for x in data:
    if x['ZIP'] == "11801":
        my_zips.append(x)

In [None]:
my_zips[0:4]

In [None]:
with open('my_zips.csv', 'w', newline="", encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=header)
    writer.writeheader()
    writer.writerows(my_zips)

## Using Open & Regex

In [None]:
with open(filename, 'r', newline='', encoding='latin1') as f:
    data = f.read()
    
words = re.findall(r'\w+', data)
Counter(words).most_common(20)

In [None]:
features = re.findall('(tesla|con ed|national grid|pseg)',data.lower())
Counter(features)

In [None]:
files = sorted(glob.glob("*energy*.csv"))
files

In [None]:
query_item = "Kings"

collector = {}
for file in files:
    with open(file, newline='', encoding='latin1') as f:
        data = f.read()
        if re.search(pattern=query_item, string=data):
            collector[file] = len(re.findall(pattern=query_item, string=data))

for k, v in collector.items():
    print(k, ">>> {:,}".format(v))

## Delete files

In [None]:
files = sorted(glob.glob("*energy_type_code_*.csv"))
files

In [None]:
for file in files:
    os.remove(file)

In [None]:
files = sorted(glob.glob("*energy_type_code_*.csv"))
files