# Data Analysis with Python Course

# Read and Write Files
# interact with OS

## freeCodeCamp.org YouTube

In [1]:
import os

# check current working directory
os.getcwd()

'/Users/ryan/UofL Health Data/Learning Python'

In [2]:
# list files in directory
os.listdir()

['Variables and Data Types.ipynb',
 'climate.txt',
 'Functions.ipynb',
 'Numpy.ipynb',
 'Read and Write Files.ipynb',
 'venv',
 '.git',
 'climate_results.txt',
 'Branching and Loops.ipynb',
 '.idea']

In [3]:
# create new directory
os.makedirs('./data', exist_ok=True)

In [7]:
# download some files into data/
import urllib.request

url1 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans1.txt'
url2 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans2.txt'
url3 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans3.txt'

urllib.request.urlretrieve(url1, './data/loans1.txt')
urllib.request.urlretrieve(url2, './data/loans2.txt')
urllib.request.urlretrieve(url3, './data/loans3.txt')

os.listdir('./data')

['loans2.txt', 'loans3.txt', 'loans1.txt']

In [13]:
# create file pointed to read contents
# open files will stay in memory until closed
file1 = open('./data/loans1.txt', mode='r')

# read contents of file
file1_c = file1.read()

print(file1_c)

amount,duration,rate,down_payment
100000,36,0.08,20000
200000,12,0.1,
628400,120,0.12,100000
4637400,240,0.06,
42900,90,0.07,8900
916000,16,0.13,
45230,48,0.08,4300
991360,99,0.08,
423000,27,0.09,47200


In [14]:
file1.close()

In [15]:
# automatically close a file after running whatever steps you need
with open('./data/loans2.txt', mode='r') as file2:
    file2_c = file2.read()
    print(file2_c)

amount,duration,rate,down_payment
828400,120,0.11,100000
4633400,240,0.06,
42900,90,0.08,8900
983000,16,0.14,
15230,48,0.07,4300


In [17]:
# read a file line by line
with open('./data/loans3.txt', mode = 'r') as file3:
    file3_lines = file3.readlines()

In [20]:
file3_lines

['amount,duration,rate,down_payment\n',
 '45230,48,0.07,4300\n',
 '883000,16,0.14,\n',
 '100000,12,0.1,\n',
 '728400,120,0.12,100000\n',
 '3637400,240,0.06,\n',
 '82900,90,0.07,8900\n',
 '316000,16,0.13,\n',
 '15230,48,0.08,4300\n',
 '991360,99,0.08,\n',
 '323000,27,0.09,4720010000,36,0.08,20000\n',
 '528400,120,0.11,100000\n',
 '8633400,240,0.06,\n',
 '12900,90,0.08,8900']

In [21]:
# remove new line character at end of each line
file3_lines[0].strip()

'amount,duration,rate,down_payment'

In [23]:
# read contents of file and create a dictionary

def parse_headers(header_line):
    return header_line.strip().split(',')

# read in headers
headers = parse_headers(file3_lines[0])
headers

['amount', 'duration', 'rate', 'down_payment']

In [26]:
def parse_values(data_line):
    values = []
    for item in data_line.strip().split(','):
        if item == '':
            values.append(0.0)
        else:
            values.append(float(item))
    return  values

parse_values(file3_lines[1])

[45230.0, 48.0, 0.07, 4300.0]

In [27]:
parse_values(file3_lines[2])

[883000.0, 16.0, 0.14, 0.0]

In [29]:
def create_item_dict(values, head):
    result = {}
    for value, header in zip(values, head):
        result[header] = value
    return result

create_item_dict(parse_values(file3_lines[2]), headers)

{'amount': 883000.0, 'duration': 16.0, 'rate': 0.14, 'down_payment': 0.0}

In [30]:
# read csv function
def read_csv(path):
    result = []
    # Open the file in read mode
    with open(path, 'r') as f:
        # Get a list of lines
        lines = f.readlines()
        # Parse the header
        headers = parse_headers(lines[0])
        # Loop over the remaining lines
        for data_line in lines[1:]:
            # Parse the values
            values = parse_values(data_line)
            # Create a dictionary using values & headers
            item_dict = create_item_dict(values, headers)
            # Add the dictionary to the result
            result.append(item_dict)
    return result

In [31]:
read_csv('./data/loans3.txt')

[{'amount': 45230.0, 'duration': 48.0, 'rate': 0.07, 'down_payment': 4300.0},
 {'amount': 883000.0, 'duration': 16.0, 'rate': 0.14, 'down_payment': 0.0},
 {'amount': 100000.0, 'duration': 12.0, 'rate': 0.1, 'down_payment': 0.0},
 {'amount': 728400.0,
  'duration': 120.0,
  'rate': 0.12,
  'down_payment': 100000.0},
 {'amount': 3637400.0, 'duration': 240.0, 'rate': 0.06, 'down_payment': 0.0},
 {'amount': 82900.0, 'duration': 90.0, 'rate': 0.07, 'down_payment': 8900.0},
 {'amount': 316000.0, 'duration': 16.0, 'rate': 0.13, 'down_payment': 0.0},
 {'amount': 15230.0, 'duration': 48.0, 'rate': 0.08, 'down_payment': 4300.0},
 {'amount': 991360.0, 'duration': 99.0, 'rate': 0.08, 'down_payment': 0.0},
 {'amount': 323000.0,
  'duration': 27.0,
  'rate': 0.09,
  'down_payment': 4720010000.0},
 {'amount': 528400.0,
  'duration': 120.0,
  'rate': 0.11,
  'down_payment': 100000.0},
 {'amount': 8633400.0, 'duration': 240.0, 'rate': 0.06, 'down_payment': 0.0},
 {'amount': 12900.0, 'duration': 90.0, '

In [32]:
loans3 = read_csv('./data/loans3.txt')

In [34]:
# write files
with open('./data/loans3_new.txt', 'w') as f:
    for loan in loans3:
        f.write('{},{},{},{}\n'.format(
            loan['amount'],
            loan['duration'],
            loan['rate'],
            loan['down_payment']))