# ITNPBD2 Representing and Manipulating Data

## Input and Output

# Simplest output: print

In [None]:
print("Hello")

## In a notebook, a nicer format can be had with `display`

In [1]:
import pandas as pd
loans=pd.read_csv("data/loans.csv",index_col=0)  # ,index_col=0 to make index first column
display(loans.head())
#print(loans.head()) # Is not nicely formatted

Unnamed: 0_level_0,Age,Gender,Years at address,Employment status,Country,Current debt,Postcode,Income,Own home,CCJs,Loan amount,Outcome
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
945780,19,F,2,Unemployed,UK,0,TA3 7SH,45500,Rent,1,13234,Paid
747989,66,F,13,Unemployed,UK,1080,WV6 8SU,18000,Own,0,5561,Paid
790344,48,F,4,Self Employed,UK,3690,BT15 5HG,47500,Rent,1,28288,Paid
597668,67,F,47,Self Employed,UK,6560,GU10 3NH,36000,Mortgage,0,30199,Paid
794971,70,M,8,Self Employed,UK,9100,GL6 6UB,50500,Own,0,35078,Paid


# Keyboard Input
- Use `input()`
- Returns a string

In [None]:
print("Type something and hit return")
x = input()
print("You typed", x )

## You will need to cast it to other types

In [3]:
print("Type a number")
x = input()
print("And another")
y = input()
print("They add up to ",float(x) + float(y))
print("This would be wrong: they add up to",x+y) # + with strings concatenates

Type a number
11
And another
14
They add up to  25.0
This would be wrong: they add up to 1114


## Convert to a list
- Lots happening in that list comprehension!
- `split` converts the string to an array, splitting on `,`
- We could do it in more steps, of course

In [4]:
# print("Enter a list of comma separated numbers")
x = input("Enter a list of comma separated numbers")
print("They add up to ",sum([float(n) for n in x.split(",")]))

Enter a list of comma separated numbers11,12,13,14
They add up to  50.0


## Going the other way, from list to comma separated string:

In [5]:
x = ['a','b','c']
print(",".join(x))

a,b,c


In [6]:
x = [1, 2, 3]
# Doesn't work for numbers
print(",".join(x))

TypeError: sequence item 0: expected str instance, int found

In [7]:
print(",".join(map(str, x)))
# or
print(",".join([str(n) for n in x]))

1,2,3
1,2,3


# Opening a File
- `open(path, mode)` to open a file
- `mode` is `r` for read, `w` for write, `a` for append (there are more, see later)
- `close` when done

In [15]:
f = open("data/loans.csv", 'r')
#print(dir(f)) --> to see attributes
f.close()

## Avoid the need to close and keep all the code operating on the file together like this:

In [9]:
with open("data/fitness.csv") as f:
    # do stuff with f

SyntaxError: unexpected EOF while parsing (<ipython-input-9-3aedc7d27e02>, line 2)

# Reading from the file
 ## Read it all into a string

In [14]:
with open("data/loans.csv") as f:
    # pass
    #s = f.read()
    s = f.readline()
print(s)

Customer ID,Age,Gender,Years at address,Employment status,Country,Current debt,Postcode,Income,Own home,CCJs,Loan amount,Outcome



## Read one row

In [11]:
with open("data/loans.csv") as f:
    s = f.readline()
    #print(f.readline())
    print(s)

Customer ID,Age,Gender,Years at address,Employment status,Country,Current debt,Postcode,Income,Own home,CCJs,Loan amount,Outcome



## Read one row at a time until the file ends
- `readline` returns false if it fails to read a line
- So you could do this

In [12]:
lines = 0
with open("data/loans.csv") as f:
    s = f.readline()
    while s:
        lines += 1
        s = f.readline()

print("Read {} rows".format(lines))
#print(f"Read {lines} rows") # The Jupyter server in lecture theaters doesn't have this

Read 2001 rows


## But there is a nicer way

In [13]:
lines = 0
with open("data/loans.csv") as f:
    for l in f:
        lines += 1
print("Read {} rows".format(lines))
#print(f"Read {lines} rows")

Read 2001 rows


## If the file is huge and you want to peek with out loading it all
- First 5 lines

In [16]:
with open("data/loans.csv") as f:
    for i in range(5):
        print(f.readline())

Customer ID,Age,Gender,Years at address,Employment status,Country,Current debt,Postcode,Income,Own home,CCJs,Loan amount,Outcome

945780,19,F,2,Unemployed,UK,0,TA3 7SH,45500,Rent,1,13234,Paid

747989,66,F,13,Unemployed,UK,1080,WV6 8SU,18000,Own,0,5561,Paid

790344,48,F,4,Self Employed,UK,3690,BT15 5HG,47500,Rent,1,28288,Paid

597668,67,F,47,Self Employed,UK,6560,GU10 3NH,36000,Mortgage,0,30199,Paid



## Second column of first 100 lines, skipping the header

In [17]:
with open("data/loans.csv",'r') as f:
    f.readline() # Skip header
    col = [f.readline().split(",")[1] for i in range(100)]
    #col = [f.line.split(",")[1] for line in f]

print(col)

['19', '66', '48', '67', '70', '77', '28', '47', '25', '89', '78', '62', '68', '73', '72', '20', '43', '83', '65', '82', '38', '44', '76', '81', '19', '35', '69', '30', '68', '26', '88', '79', '61', '66', '19', '72', '67', '45', '85', '35', '85', '26', '56', '21', '85', '62', '73', '22', '85', '79', '81', '27', '78', '43', '65', '75', '34', '65', '87', '38', '32', '39', '58', '58', '82', '43', '62', '67', '39', '75', '22', '58', '69', '27', '77', '18', '18', '43', '33', '67', '69', '72', '29', '78', '44', '17', '60', '59', '40', '52', '76', '44', '27', '69', '52', '53', '68', '38', '18', '30']


# Directories
- Find out the current working directory with `os.getcwd()`

In [18]:
import os
print(os.getcwd())

/Users/johnjabbo/Desktop/Jupyter


# List a directory
- Using `os.walk`
- Lists each folder and each file in a given folder
- Then enters each folder and does the same in there and so on

In [19]:
root = os.getcwd()

dw = os.walk(root)
# d is a generator function, so we iterate:
# r=root, d=directories, f = files
for r, d, f in dw:
    display(r, d, f)


'/Users/johnjabbo/Desktop/Jupyter'

['.ipynb_checkpoints', 'Data', 'Jestin Book']

['6-Style and Scope.ipynb',
 '5-IfsandLoops.ipynb',
 'fitness.csv',
 '.DS_Store',
 'MovieData.json',
 'Lab 4 Pandas.ipynb',
 'Lab 2 Functions Solutions.ipynb',
 'BookCipherEncryption.ipynb',
 'BookCipherEncryption-Copy1.ipynb',
 '2-PythonObjects.ipynb',
 'loans.csv',
 'Lab 1 Intro Python.ipynb',
 'Lab 5 Visualisation.ipynb',
 'Lab 2 Functions-1.ipynb',
 'BookCipherAssignmentAttempt2.ipynb',
 '8-Pandas.ipynb',
 'Lab 3 NumPy.ipynb',
 'zips.json',
 '3-Dictionaries-1.ipynb',
 '7-NumPy.ipynb',
 '1-JupyterIntro.ipynb',
 '4-FunctionsGenerators.ipynb',
 '10-Files and Strings.ipynb',
 'BookCipherAssignment.ipynb',
 'BookCipherDecryption.ipynb']

'/Users/johnjabbo/Desktop/Jupyter/.ipynb_checkpoints'

[]

['BookCipherEncryption-checkpoint.ipynb',
 '2-PythonObjects-checkpoint.ipynb',
 'Lab 1 Intro Python-checkpoint.ipynb',
 '8-Pandas-checkpoint.ipynb',
 'BookCipherDecryption-checkpoint.ipynb',
 'BookCipherAssignmentAttempt2-checkpoint.ipynb',
 'BookCipherEncryption-Copy1-checkpoint.ipynb',
 '1-JupyterIntro-checkpoint.ipynb',
 'Untitled1-checkpoint.ipynb',
 'BookCipherAssignment-checkpoint.ipynb',
 'Untitled-checkpoint.ipynb',
 '10-Files and Strings-checkpoint.ipynb']

'/Users/johnjabbo/Desktop/Jupyter/Data'

[]

['AddedOutput.txt',
 'Customers.csv',
 'Products.csv',
 'fitness.csv',
 'Orders.csv',
 'loans.csv',
 'newfile.txt',
 'OrdersWithKey.csv',
 'sleep.csv',
 'sales.csv']

'/Users/johnjabbo/Desktop/Jupyter/Jestin Book'

['.ipynb_checkpoints']

['Encryption engine-1.py',
 'Encryption engine(core part).txt',
 'Decryption engine.py',
 'Decryption engine(core part).txt',
 'Encryption engine.py']

'/Users/johnjabbo/Desktop/Jupyter/Jestin Book/.ipynb_checkpoints'

[]

[]

## Just the `.txt` files in a given folder
- We only want to see the current folder, so we just get the first file list
- `_` means ignore this variable when unpacking the next item from the generator
- `next` gets the next item from the generator

In [None]:
dw = os.walk("data")
_, _, f = next(dw)
for fn in f:
    if fn[-4:] == '.txt':
        print(fn)
        
# or

dw = os.walk("data")
_, _, f = next(dw)
txts = [fn for fn in f if fn[-4:] == '.txt']
print(txts)

# Writing to a file
- Open with `w` or `a`
- Run it twice - no change in file contents as `w` starts an empty file
- Now change to open `a` for appen and re-run

In [1]:
#Create a new file and overwrite some text into it
with open("data/newfile.txt",'w') as f:
    f.write("Some text the overwrite\n")

with open("data/newfile.txt",'r') as f:
    a = f.read()
print(a)

Some text the overwrite



## What can we write?
- Strings only - convert everything else like you do when printing

In [None]:
a = 35
with open("data/newfile.txt",'w') as f:
    f.write("Writing {a}".format(a=a))
#    f.write(f"Writing {a}")
    
with open("data/newfile.txt",'r') as f:
    a = f.read()
print(a)

## Arrays and CSV
- We saw above how to convert an array to a separated string with `join` and `str`
- There is also a `csv` library
- Use this to specify how to write an array as a string to a file
- Specify separator, quote encloser and level of quotes
- - Try `QUOTE_NONNUMERIC` or `QUOTE_ALL` or `QUOTE_NONE`

In [None]:
import csv

line1 = [1, 2, 3, "a", "a,b"]
line2 = [4, 5, 6, "b", "c"]
with open('data/newfile.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',',
                        quotechar='"', quoting=csv.QUOTE_NONE)
    writer.writerow(line1)
    writer.writerow(line2)
    
with open("data/newfile.csv",'r') as f:
    a = f.read()
print(a)


# Pandas and NumPy have Their Own
- Use the built in file methods where available, they are better and faster
## Pandas `read_csv` and `to_csv` or `to_json`
## NumPy `loadtxt` and `savetxt`

## Other exports from Pandas:

In [None]:
df = pd.DataFrame([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])
df.to_clipboard() # Paste somewhere now! Doesn't work on the lecture theater server
with open('data/table.html', 'w') as f:
    f.write(df.to_html())

In [None]:
   0  1  2
0  1  2  3
1  4  5  6
2  7  8  9

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=41af8bd7-a5ed-4334-a2fe-992dcc7ea742' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>