# File handling

### Input/Output

In [1]:
# input function reads from standard in and returns a string
answer = input("enter your name:")
print(answer)

enter your name:Max
Max


### Simple file operations

##### writing to files

In [2]:
# files can be opened using the open function, which creates a file object
f = open( 'new_file.txt', 'w' ) # attention overwrites existing file
# important functions: read, write, readlines, writelines
# dir(f) 
f.write("Hallo Welt!")
f.close()

In [3]:
# writelines
lines  = []
for i in range(12):
    lines.append("Number: " + str(i) + '\n')
print(lines)


f = open( 'new_file.txt', 'a' )  # open file and append to it
f.writelines(lines)
f.close()

['Number: 0\n', 'Number: 1\n', 'Number: 2\n', 'Number: 3\n', 'Number: 4\n', 'Number: 5\n', 'Number: 6\n', 'Number: 7\n', 'Number: 8\n', 'Number: 9\n', 'Number: 10\n', 'Number: 11\n']


##### reading from files

In [4]:
# usage of with to open files is recommended in python
with open('new_file.txt', 'r') as f: # open file for reading
    content = f.read() # get the whole content of a file into a string

print(content)


Hallo Welt!Number: 0
Number: 1
Number: 2
Number: 3
Number: 4
Number: 5
Number: 6
Number: 7
Number: 8
Number: 9
Number: 10
Number: 11



In [5]:
with open('new_file.txt', 'r') as f: # open file for reading
    lines = f.readlines()
print(lines)
f.close()

['Hallo Welt!Number: 0\n', 'Number: 1\n', 'Number: 2\n', 'Number: 3\n', 'Number: 4\n', 'Number: 5\n', 'Number: 6\n', 'Number: 7\n', 'Number: 8\n', 'Number: 9\n', 'Number: 10\n', 'Number: 11\n']


### Parsing data 

In [6]:
with open('data.txt', 'r') as f:
    lines = f.readlines()
#print(lines)

data = {}

# iterate over all lines in the file
for line in lines:
    if line.startswith('#'): # skip comments
        continue
    left, right = line.split(':') # split splits a string at the occurence of the keyword
    data[ left.strip() ] = float(right) # strip removes leading and tailing spaces
print(data)


{'HAA1': 76681.4, 'ICL2': 64976.6, 'GPD1': 42854.2, 'HAL1': 33001.4, 'REC8': 77178.6, 'SUT2': 30272.2}


### Object serialization using pickle

In [7]:
# the pickle class is used to serialize python variables (convert them to bytestrings)
import pickle

##### how pickle converts objects to strings

In [8]:
d = { 1: 'green', 2: 'blue', 3: 'red' }
pickle.dumps(d) 

b'\x80\x03}q\x00(K\x01X\x05\x00\x00\x00greenq\x01K\x02X\x04\x00\x00\x00blueq\x02K\x03X\x03\x00\x00\x00redq\x03u.'

##### how to save objects to a file

In [9]:
with open('save.p', 'wb') as f:
    pickle.dump(d, f)

In [10]:
with open('save.p', 'wb') as f:
    pickle.dump(d, f)

In [11]:
with open('save.p', 'rb') as f:
    loaded_data = pickle.load(f)
print(loaded_data)

{1: 'green', 2: 'blue', 3: 'red'}


### Organizing files in folders

In [12]:
# create a folder for the data files
import os

# get current working directory
work_path = os.getcwd()
print(work_path)

/home/schelker/Projects/Repository/fachkurs_master_2016/05_dataio_matplotlib


In [13]:
# define path for data files
data_path = os.path.join(work_path, 'data/')

# check if folder exists already
if not os.path.exists(data_path): 
    os.mkdir(data_path)

### Comma separated value (CSV) files

### reading tabular data using pandas

In [14]:
with open('real_estate.csv', 'r') as f:
    f.readlines()

In [15]:
import pandas as pd
df = pd.read_csv( 'real_estate.csv' )
print(df)
#print(df.values.tolist())

                              street             city    zip state  beds  \
0                       3526 HIGH ST       SACRAMENTO  95838    CA     2   
1                        51 OMAHA CT       SACRAMENTO  95823    CA     3   
2                     2796 BRANCH ST       SACRAMENTO  95815    CA     2   
3                   2805 JANETTE WAY       SACRAMENTO  95815    CA     2   
4                    6001 MCMAHON DR       SACRAMENTO  95824    CA     2   
5                 5828 PEPPERMILL CT       SACRAMENTO  95841    CA     3   
6                6048 OGDEN NASH WAY       SACRAMENTO  95842    CA     3   
7                      2561 19TH AVE       SACRAMENTO  95820    CA     3   
8    11150 TRINITY RIVER DR Unit 114   RANCHO CORDOVA  95670    CA     2   
9                       7325 10TH ST        RIO LINDA  95673    CA     3   
10                  645 MORRISON AVE       SACRAMENTO  95838    CA     3   
11                     4085 FAWN CIR       SACRAMENTO  95823    CA     3   
12          

#JavaScript Object Notation (JSON)#

*JSON (/ˈdʒeɪsən/ JAY-sən),[1] or JavaScript Object Notation, is an open standard format that uses human-readable text to transmit data objects consisting of attribute–value pairs. It is used primarily to transmit data between a server and web application, as an alternative to XML.*

*Although originally derived from the JavaScript scripting language, JSON is a language-independent data format. Code for parsing and generating JSON data is readily available in many programming languages.*

https://en.wikipedia.org/wiki/JSON

In [16]:
cat employees.json

{"employees":[
    {"firstName":"John", "lastName":"Doe"},
    {"firstName":"Anna", "lastName":"Smith"},
    {"firstName":"Peter", "lastName":"Jones"}
]}


In [17]:
import json

d = json.load( open('employees.json') ) 

d['employees'][1] 

{'firstName': 'Anna', 'lastName': 'Smith'}