# Creating and Opening Files

In [55]:
# Create a context manager

def remember(thing):
    with open('someText.txt', 'a') as file: # 'a' is a flag to append to the newly created file...also 'w', and 'r'
        file.write(thing + '\n')

if __name__ == '__main__':
    remember(input('What should I remember?'))

What should I remember? Suits


# Reading Files

In [56]:
someFile = open('someText.txt')
someFile.read(10)

'Apples\nCoa'

In [57]:
someFile.seek(0)
someFile.read()

'Apples\nCoats\nBoats\nSuits\n'

In [60]:
someFile.seek(0)
lines = someFile.readlines()
lines

['Apples\n', 'Coats\n', 'Boats\n', 'Suits\n']

# Import txt files

In [None]:
file = open(path, mode = 'r') #r=read, w=write
text = file.read()
file.close()

print(file.closed) #will print True or False to ensure file is closed
print(text)

In [None]:
#To automatically close the file use a Context Manager
path = 'path'
with open(path, 'r') as file:
    print(file.read())
    
print(file.closed)

In [None]:
#Print a fewlines at a time
path = 'path'
with open(path) as file:
    print(file.readline())

print(file.closed)

In [None]:
#Saving files
df.to_csv('new file name')
df.to_excel('new file name')

# Importing Flat Files using NumPy

In [None]:
import numpy as np
data = np.loadtxt(path, delimiter=',', dtype=str, skiprows=1, usecols=[0,2,15])

In [None]:
#Importing mixed data types using numpy
np.genfromtxt('titanic.csv', delimiter=',', names=True, dtype=None)
np.recfromcsv('titanic.csv') 

# Importing using Pandas

In [None]:
import pandas as pd
data = pd.read_csv(path)

#Turn into numpy array
data_array = df.column.values
print(data_array)

In [None]:
#Importing Excel file
import panda as pd
file = 'file_name.xlsx'
data = pd.ExcelFile(file)

print(data.sheet_names) #to print out sheets in a workbook
df = data.parse('sheet_name') #import the sheets by name or index
df = data.parse(0, parse_cols=[0], skiprows=[0], names=['Renamed Column'])

In [None]:
#importing Stata files
import pandas as pd
data = pd.read_stata('file_name')

In [None]:
#Importing MATLAB files
import scipy.io
mat = scipy.io.loadmat(filename)

#mat objects are dictionaries...keys=variable names, 
#values = objects assigned to variables

In [None]:
# Import pickle package
import pickle

with open('data.pkl', 'rb') as file:
    d = pickle.load(file)

In [None]:
#Importing SAS files using pandas
import pandas as pd
from sas7bdat import SAS7BDAT

with SAS7BDAT('file_name') as file:
    variable = file.to_data_frame()

In [None]:
#Importing HDF5...can scale to exabytes and is hirearchical 
import h5py
data = h5py.File(path, 'r')

#groups in hdf5
for key in data.keys():
    print(key)
     
#subgroups
for key in data['group'].keys():
    print(key)
    
#values
print(data['group']['subgroup'].value)

# Importing Relational Database

In [None]:
from sqlalchemy import create_engine

engine = create_engine('sqlite:///file_name')

#to view tables
tables = engine.table_names()
print(tables)

In [None]:
#Querying relational databases in Python
from sqlalchemy import create_engine
import pandas as pd

engine = create_engine('sqlite:///file_name')
con = engine.connect()
rs = con.execute('SELECT * FROM orders')
df = pd.DataFrames(rs.fetchall())
df.columns = rs.keys()
con.close()
print(df.head())

#or using context manager
from sqlalchemy import create_engine
import pandas as pd

engine = create_engine('sqlite:///file_name')
with engine.connect() as con:
    rs = con.execute('SELECT * FROM orders') #WHERE, ORDER BY, INNER JOIN...filter search
    df = pd.DataFrames(rs.fetchall()) #fetchmany(5) #amount of rows
    df.columns = rs.keys() #custom column names

# Querying relational databases directly with pandas

In [None]:
from sqlalchemy import create_engine
import pandas as pd

engine = create_engine('sqlite:///file_name')
df = pd.read_sql_query('SELECT * FROM Orders', engine)

# Scraping the Web

In [None]:
#Download file 
from urllib.request import urlretrieve

url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
urlretrieve = (url, 'winequality-white.csv' )
print(urlretrieve)

In [None]:
#HTTP Requests to import files from the web using urllib
from urllib.request import urlopen, Request

url = 'http://wikipedia.org/'
request = Request(url)
response = urlopen(request)
html = response.read()
respinse.close()

In [None]:
#HTTP Requests to import files from the web using Requests package
import requests
from bs4 import BeautifulSoup

url = 'http://wikipedia.org/'
request = requests.get(url)
text = request.text

formatted = BeautifulSoup(text)
pretty = formatted.prettify()
print(pretty)

for link in formatted.find_all('a'):
    print(link.get('href'))

In [None]:
#Loading JSON in Python
import json

with open('file.json', 'r') as json_file:
    json_data = json.load(json_file)
    
for key, value in json_data.items():
    print(key + ':' + value)

In [None]:
#Connecting to an API
import requests

url = 'http://omdbapi.com/?t=hackers'
content = requests.get(url)
json_data = content.json()
for key, value in json_data.items():
    print(key + ':' + value)