# ITNPBD2 Representing and Manipulating Data

## Input and Output

# Simplest output: print

In [None]:
print("Hello")

## In a notebook, a nicer format can be had with `display`

In [None]:
import pandas as pd
loans=pd.read_csv("data/loans.csv",index_col=0)  # ,index_col=0 to make index first column
display(loans.head())
#print(loans.head()) # Is not nicely formatted

# Keyboard Input
- Use `input()`
- Returns a string

In [None]:
print("Type something and hit return")
x = input()
print("You typed",x)

## You will need to cast it to other types

In [None]:
print("Type a number")
x = input()
print("And another")
y = input()
print("They add up to ",float(x) + float(y))
print("This would be wrong: they add up to",x+y) # + with strings concatenates

## Convert to a list
- Lots happening in that list comprehension!
- `split` converts the string to an array, splitting on `,`
- We could do it in more steps, of course

In [None]:
print("Enter a list of comma separated numbers")
x = input()
print("They add up to ",sum([float(n) for n in x.split(",")]))

## Going the other way, from list to comma separated string:

In [None]:
x = ['a','b','c']
print(",".join(x))

In [None]:
x = [1, 2, 3]
# Doesn't work for numbers
print(",".join(x))

In [None]:
print(",".join(map(str, x)))
# or
print(",".join([str(n) for n in x]))

# Opening a File
- `open(path, mode)` to open a file
- `mode` is `r` for read, `w` for write, `a` for append (there are more, see later)
- `close` when done

In [None]:
f = open("data/loans.csv", 'r')
f.close()

## Avoid the need to close and keep all the code operating on the file together like this:

In [None]:
with open("data/loans.csv") as f:
    # do stuff with f

# Reading from the file
 ## Read it all into a string

In [None]:
with open("data/loans.csv") as f:
    s = f.read()
print(s)

## Read one row

In [None]:
with open("data/loans.csv") as f:
    s = f.readline()
print(s)

## Read one row at a time until the file ends
- `readline` returns false if it fails to read a line
- So you could do this

In [None]:
lines = 0
with open("data/loans.csv") as f:
    s = f.readline()
    while s:
        lines += 1
        s = f.readline()

print("Read {} rows".format(lines))
#print(f"Read {lines} rows") # The Jupyter server in lecture theaters doesn't have this

## But there is a nicer way

In [None]:
lines = 0
with open("data/loans.csv") as f:
    for l in f:
        lines += 1
print("Read {} rows".format(lines))
#print(f"Read {lines} rows")

## If the file is huge and you want to peek with out loading it all
- First 5 lines

In [None]:
with open("data/loans.csv") as f:
    for i in range(5):
        print(f.readline())

## Second column of first 100 lines, skipping the header

In [None]:
with open("data/loans.csv",'r') as f:
    f.readline() # Skip header
    col = [f.readline().split(",")[1] for i in range(100)]

print(col)

# Directories
- Find out the current working directory with `os.getcwd()`

In [None]:
import os
print(os.getcwd())

# List a directory
- Using `os.walk`
- Lists each folder and each file in a given folder
- Then enters each folder and does the same in there and so on

In [None]:
root = os.getcwd()

dw = os.walk(root)
# d is a generator function, so we iterate:
# r=root, d=directories, f = files
for r, d, f in dw:
    display(r, d, f)


## Just the `.txt` files in a given folder
- We only want to see the current folder, so we just get the first file list
- `_` means ignore this variable when unpacking the next item from the generator
- `next` gets the next item from the generator

In [None]:
dw = os.walk("data")
_, _, f = next(dw)
for fn in f:
    if fn[-4:] == '.txt':
        print(fn)
        
# or

dw = os.walk("data")
_, _, f = next(dw)
txts = [fn for fn in f if fn[-4:] == '.txt']
print(txts)

# Writing to a file
- Open with `w` or `a`
- Run it twice - no change in file contents as `w` starts an empty file
- Now change to open `a` for appen and re-run

In [None]:
with open("data/newfile.txt",'w') as f:
    f.write("Some text\n")

with open("data/newfile.txt",'r') as f:
    a = f.read()
print(a)

## What can we write?
- Strings only - convert everything else like you do when printing

In [None]:
a = 35
with open("data/newfile.txt",'w') as f:
    f.write("Writing {a}".format(a=a))
#    f.write(f"Writing {a}")
    
with open("data/newfile.txt",'r') as f:
    a = f.read()
print(a)

## Arrays and CSV
- We saw above how to convert an array to a separated string with `join` and `str`
- There is also a `csv` library
- Use this to specify how to write an array as a string to a file
- Specify separator, quote encloser and level of quotes
- - Try `QUOTE_NONNUMERIC` or `QUOTE_ALL` or `QUOTE_NONE`

In [None]:
import csv

line1 = [1, 2, 3, "a", "a,b"]
line2 = [4, 5, 6, "b", "c"]
with open('data/newfile.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',',
                        quotechar='"', quoting=csv.QUOTE_NONE)
    writer.writerow(line1)
    writer.writerow(line2)
    
with open("data/newfile.csv",'r') as f:
    a = f.read()
print(a)


# Pandas and NumPy have Their Own
- Use the built in file methods where available, they are better and faster
## Pandas `read_csv` and `to_csv` or `to_json`
## NumPy `loadtxt` and `savetxt`

## Other exports from Pandas:

In [None]:
df = pd.DataFrame([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])
df.to_clipboard() # Paste somewhere now! Doesn't work on the lecture theater server
with open('data/table.html', 'w') as f:
    f.write(df.to_html())

In [None]:
   0  1  2
0  1  2  3
1  4  5  6
2  7  8  9