<a href="https://colab.research.google.com/github/shivanshmangla/numpy/blob/main/file_handling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://jovian.ml/aakashns/python-os-and-filesystem

## Interacting with the OS and filesystem

The `os` module in Python provides many functions for interacting with the OS and the filesystem. Let's import it and try out some examples.

In [54]:
import os
import numpy as np


# We can check the present working directory using the `os.getcwd` function.

os.getcwd()

'/content'

In [55]:
os.listdir('.') # relative path
#shows hidden files

['.config',
 'climate.txt',
 'drive',
 'data',
 'climate_results.txt',
 'sample_data']

In [56]:
# You can create a new directory using `os.makedirs`. Let's create a new directory called `data`, where we'll later download some files.

os.makedirs('./data', exist_ok=True)

#Let's verify that the directory was created and is currently empty.
print('data' in os.listdir('.'))

os.listdir('./data')  # empty as abhi kuchh nhi daala

True


['loans3.txt', 'loans2.txt', 'loans1.txt']

###Let us download some files into the `data` directory using the `urllib` module.

In [118]:
from urllib.request import urlretrieve
url1 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans1.txt'
url2 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans2.txt'
url3 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans3.txt'
urlretrieve(url1, './data/loans1.txt')
urlretrieve(url2, './data/loans2.txt')
urlretrieve(url3, './data/loans3.txt')

('./data/loans3.txt', <http.client.HTTPMessage at 0x7f7abfb7ea00>)

In [41]:
# Let's verify that the files were downloaded.
os.listdir('./data')

['loans3.txt', 'loans2.txt', 'loans1.txt']

In [119]:
file1 = open('./data/loans1.txt',mode = 'r')
file_contents = file1.read()
print(file_contents)

file1.close()

amount,duration,rate,down_payment
100000,36,0.08,20000
200000,12,0.1,
628400,120,0.12,100000
4637400,240,0.06,
42900,90,0.07,8900
916000,16,0.13,
45230,48,0.08,4300
991360,99,0.08,
423000,27,0.09,47200


In [120]:
with open('./data/loans2.txt',mode='r') as file2:
  file2_contents = file2.read()
  print(file2_contents)

amount,duration,rate,down_payment
828400,120,0.11,100000
4633400,240,0.06,
42900,90,0.08,8900
983000,16,0.14,
15230,48,0.07,4300


In [48]:
file2.read()

ValueError: ignored

In [121]:
with open('./data/loans3.txt',mode='r') as file3:
  file3_contents = file3.readlines()
  print(file3_contents)

['amount,duration,rate,down_payment\n', '45230,48,0.07,4300\n', '883000,16,0.14,\n', '100000,12,0.1,\n', '728400,120,0.12,100000\n', '3637400,240,0.06,\n', '82900,90,0.07,8900\n', '316000,16,0.13,\n', '15230,48,0.08,4300\n', '991360,99,0.08,\n', '323000,27,0.09,4720010000,36,0.08,20000\n', '528400,120,0.11,100000\n', '8633400,240,0.06,\n', '12900,90,0.08,8900']


In [58]:
file3_contents[0].strip()   #removes the newline charecters

'amount,duration,rate,down_payment'

In [122]:
def parse_headers(header_line):
  return header_line.strip().split(',')

headers = parse_headers(file3_contents[0])

print(headers)

['amount', 'duration', 'rate', 'down_payment']


In [62]:
def parse_values(data_line):
  values = []
  for item in data_line.strip().split(','):
    values.append(float(item))
  return values

parse_values(file3_contents[2])

ValueError: ignored

In [123]:
def parse_values(data_line):
  values=[]
  for item in data_line.strip().split(','):
    if item == '':
      values.append(0.0)
    else:
      values.append(float(item))
  return values

print(parse_values(file3_contents[2]))

[883000.0, 16.0, 0.14, 0.0]


In [None]:
def create_item_dict(values , headers):
  result = {}
  for value,header in zip(values,headers):
    result[header]=value
  return result

values1 = parse_values(file3_contents[1])

print(create_item_dict(values1,headers))

values2 = parse_values(file3_contents[2])

print(create_item_dict(values2,headers))

In [113]:
def read_csv(path):
  result = []
  with open(path,mode='r') as f:
    f_contents = f.readlines()
    header = parse_headers(f_contents[0])
    for data_line in f_contents[1:]:
      value = parse_values(data_line)
      item_dict = create_item_dict(value,header)
      result.append(item_dict)
  return result

In [124]:
def parse_headers(header_line):
    return header_line.strip().split(',')

def parse_values(data_line):
    values = []
    for item in data_line.strip().split(','):
        if item == '':
            values.append(0.0)
        else:
            try:
                values.append(float(item))
            except ValueError:
                values.append(item)
    return values

def create_item_dict(values, headers):
    result = {}
    for value, header in zip(values, headers):
        result[header] = value
    return result

def read_csv(path):
    result = []
    # Open the file in read mode
    with open(path, 'r') as f:
        # Get a list of lines
        lines = f.readlines()
        # Parse the header
        headers = parse_headers(lines[0])
        # Loop over the remaining lines
        for data_line in lines[1:]:
            # Parse the values
            values = parse_values(data_line)
            # Create a dictionary using values & headers
            item_dict = create_item_dict(values, headers)
            # Add the dictionary to the result
            result.append(item_dict)
    return result

In [125]:
read_csv('./data/loans2.txt')

[{'amount': 828400.0,
  'duration': 120.0,
  'rate': 0.11,
  'down_payment': 100000.0},
 {'amount': 4633400.0, 'duration': 240.0, 'rate': 0.06, 'down_payment': 0.0},
 {'amount': 42900.0, 'duration': 90.0, 'rate': 0.08, 'down_payment': 8900.0},
 {'amount': 983000.0, 'duration': 16.0, 'rate': 0.14, 'down_payment': 0.0},
 {'amount': 15230.0, 'duration': 48.0, 'rate': 0.07, 'down_payment': 4300.0}]

In [126]:
import math

def loan_emi(amount, duration, rate, down_payment=0):
    """Calculates the equal montly installment (EMI) for a loan.
    
    Arguments:
        amount - Total amount to be spent (loan + down payment)
        duration - Duration of the loan (in months)
        rate - Rate of interest (monthly)
        down_payment (optional) - Optional intial payment (deducted from amount)
    """
    loan_amount = amount - down_payment
    try:
        emi = loan_amount * rate * ((1+rate)**duration) / (((1+rate)**duration)-1)
    except ZeroDivisionError:
        emi = loan_amount / duration
    emi = math.ceil(emi)
    return emi

In [128]:
loans2 = read_csv('./data/loans2.txt')
print((loans2))

for loan in loans2:
    loan['emi'] = loan_emi(loan['amount'], 
                           loan['duration'], 
                           loan['rate']/12, # the CSV contains yearly rates
                           loan['down_payment'])

[{'amount': 828400.0, 'duration': 120.0, 'rate': 0.11, 'down_payment': 100000.0}, {'amount': 4633400.0, 'duration': 240.0, 'rate': 0.06, 'down_payment': 0.0}, {'amount': 42900.0, 'duration': 90.0, 'rate': 0.08, 'down_payment': 8900.0}, {'amount': 983000.0, 'duration': 16.0, 'rate': 0.14, 'down_payment': 0.0}, {'amount': 15230.0, 'duration': 48.0, 'rate': 0.07, 'down_payment': 4300.0}]


In [134]:
def compute_emis(loans):
  for loan in loans:
    loan['emi'] = loan_emi(loan['amount'],
                         loan['duration'],
                         loan['rate'],
                         loan['down_payment'])
  return loans

## Writing to files

Now that we have performed some processing on the data, it would be good to write the results back to a CSV file. We can create/open a file in `w` mode using `open` and write to it using the `.write` method. The string `format` method will come in handy here.

In [130]:
loans2 = read_csv('./data/loans2.txt')

compute_emis(loans2)

with open('./data/loans2.txt',mode = 'w') as f:
  for loan in loans2:
    f.write('{},{},{},{},{}\n'.format(
        loan['amount'],
        loan['duration'],
        loan['rate'],
        loan['down_payment'],
        loan['emi']
    ))

In [144]:
def write_csv(item,path):
  with open(path , mode = 'w') as f:
    if len(item)==0:
      return
    
    headers = list(item[0].keys())
    f.write(','.join(headers) + '\n')

    for item in item:
      values = []
      for header in headers:
        values.append(str(item.get(header,"")))
      f.write(','.join(values) + "\n")


In [135]:
loans3= read_csv('./data/loans3.txt')
print(loans3)

print(compute_emis(loans3))

[{'amount': 45230.0, 'duration': 48.0, 'rate': 0.07, 'down_payment': 4300.0}, {'amount': 883000.0, 'duration': 16.0, 'rate': 0.14, 'down_payment': 0.0}, {'amount': 100000.0, 'duration': 12.0, 'rate': 0.1, 'down_payment': 0.0}, {'amount': 728400.0, 'duration': 120.0, 'rate': 0.12, 'down_payment': 100000.0}, {'amount': 3637400.0, 'duration': 240.0, 'rate': 0.06, 'down_payment': 0.0}, {'amount': 82900.0, 'duration': 90.0, 'rate': 0.07, 'down_payment': 8900.0}, {'amount': 316000.0, 'duration': 16.0, 'rate': 0.13, 'down_payment': 0.0}, {'amount': 15230.0, 'duration': 48.0, 'rate': 0.08, 'down_payment': 4300.0}, {'amount': 991360.0, 'duration': 99.0, 'rate': 0.08, 'down_payment': 0.0}, {'amount': 323000.0, 'duration': 27.0, 'rate': 0.09, 'down_payment': 4720010000.0}, {'amount': 528400.0, 'duration': 120.0, 'rate': 0.11, 'down_payment': 100000.0}, {'amount': 8633400.0, 'duration': 240.0, 'rate': 0.06, 'down_payment': 0.0}, {'amount': 12900.0, 'duration': 90.0, 'rate': 0.08, 'down_payment': 8

In [145]:
write_csv(loans3,'./data/emis3.txt')

In [146]:
with open('./data/emis3.txt',mode='r') as f:
  print(f.read())

amount,duration,rate,down_payment,emi
45230.0,48.0,0.07,4300.0,2981
883000.0,16.0,0.14,0.0,140941
100000.0,12.0,0.1,0.0,14677
728400.0,120.0,0.12,100000.0,75409
3637400.0,240.0,0.06,0.0,218245
82900.0,90.0,0.07,8900.0,5192
316000.0,16.0,0.13,0.0,47851
15230.0,48.0,0.08,4300.0,897
991360.0,99.0,0.08,0.0,79348
323000.0,27.0,0.09,4720010000.0,-470717536
528400.0,120.0,0.11,100000.0,47125
8633400.0,240.0,0.06,0.0,518005
12900.0,90.0,0.08,8900.0,321

