# Python File Input and Output

## Getting a List of Files

In [1]:
mypath = "./data/"

In [3]:
import os
files = os.listdir(mypath)
print(files)

['CAC40PricesFromYahoo.pkl', 'data.pkl', 'DAX30PricesFromYahoo.pkl', 'DowJonesPricesFromGoogle.pkl', 'DowJonesPricesFromYahoo.pkl', 'EquityIndexPricesFromYahoo.pkl', 'FTSE100PricesFromYahoo.pkl', 'FTSEMIBPricesFromYahoo.pkl', 'fxrates', 'GermanDataClean.csv', 'GermanDataClean_OUT.csv', 'giltBondPrices.txt', 'IndexComponentTickers.xls', 'timeSeriesData.pkl']


Sometimes we like to be able to use a pattern matching string

In [4]:
import glob
files = glob.glob("./data/*.csv")
print(files)

['./data\\GermanDataClean.csv', './data\\GermanDataClean_OUT.csv']


## Reading from a File

In [5]:
filename = "./data/GermanDataClean.csv"

In [6]:
f = open(filename,"r") 
lines = f.readlines()
f.close() 

In [7]:
len(lines)

1001

In [8]:
lines[0:10]

['Default,Account Balance,Duration of Credit (month),Payment Status of Previous Credit,Purpose,Credit Amount,Value Savings/Stocks,Length of current employment,Instalment per cent,Sex & Marital Status,Guarantors,Duration in Current address,Most valuable available asset,Age (years),Concurrent Credits,Type of apartment,No of Credits at this Bank,Occupation,No of dependents,Telephone,Foreign Worker\n',
 '1,1,18,4,2,1049,1,2,4,2,1,4,2,21,3,1,1,3,1,1,1\n',
 '1,1,9,4,0,2799,1,3,2,3,1,2,1,36,3,1,2,3,2,1,1\n',
 '1,2,12,2,9,841,2,4,2,2,1,4,1,23,3,1,1,2,1,1,1\n',
 '1,1,12,4,0,2122,1,3,3,3,1,2,1,39,3,1,2,2,2,1,2\n',
 '1,1,12,4,0,2171,1,3,4,3,1,4,2,38,1,2,2,2,1,1,2\n',
 '1,1,10,4,0,2241,1,2,1,3,1,3,1,48,3,1,2,2,2,1,2\n',
 '1,1,8,4,0,3398,1,4,1,3,1,4,1,39,3,2,2,2,1,1,2\n',
 '1,1,6,4,0,1361,1,2,2,3,1,4,1,40,3,2,1,2,2,1,2\n',
 '1,4,18,4,3,1098,1,1,4,2,1,4,3,65,3,2,2,1,1,1,1\n']

A more elegant way of opening the file is using the with command

In [32]:
with open(filename,"r") as f: 
    lines = f.readlines()

FileNotFoundError: [Errno 2] No such file or directory: './data/GermanDataClean_OUT.csv'

## Parsing Text using Python

We can then parse this using Python Strings

In [33]:
data = []
for line in lines:
    fields = line.split(",")
    data.append(fields)

In [34]:
print("Number of rows:",len(data))
print("Number of columns:",len(data[0]))

Number of rows: 1001
Number of columns: 21


In [35]:
print(data[0])

['Default', 'Account Balance', 'Duration of Credit (month)', 'Payment Status of Previous Credit', 'Purpose', 'Credit Amount', 'Value Savings/Stocks', 'Length of current employment', 'Instalment per cent', 'Sex & Marital Status', 'Guarantors', 'Duration in Current address', 'Most valuable available asset', 'Age (years)', 'Concurrent Credits', 'Type of apartment', 'No of Credits at this Bank', 'Occupation', 'No of dependents', 'Telephone', 'Foreign Worker\n']


In [36]:
print(data[1])

['1', '1', '18', '4', '2', '1049', '1', '2', '4', '2', '1', '4', '2', '21', '3', '1', '1', '3', '1', '1', '1\n']


We may decide to strip out the return character in the final field before we split the fields

In [37]:
data = []
for line in lines:
    line = line.replace("\n","")
    fields = line.split(",")
    data.append(fields)

In [38]:
print(data[0])

['Default', 'Account Balance', 'Duration of Credit (month)', 'Payment Status of Previous Credit', 'Purpose', 'Credit Amount', 'Value Savings/Stocks', 'Length of current employment', 'Instalment per cent', 'Sex & Marital Status', 'Guarantors', 'Duration in Current address', 'Most valuable available asset', 'Age (years)', 'Concurrent Credits', 'Type of apartment', 'No of Credits at this Bank', 'Occupation', 'No of dependents', 'Telephone', 'Foreign Worker']


In [39]:
print(data[1])

['1', '1', '18', '4', '2', '1049', '1', '2', '4', '2', '1', '4', '2', '21', '3', '1', '1', '3', '1', '1', '1']


These are all strings - we want values

## Outputting the File

In [40]:
filename = "./data/GermanDataClean_OUT.csv"

with open(filename,"w") as f: 
    for row in data:
        ncols = len(row)
        for field in row[0:ncols-1]:
            f.write(str(field) + ",")
        for field in row[ncols-1:ncols]:
            f.write(str(field) + "\n")            

## String Functions

In [41]:
s = "EDHEC is a Business School"

In [42]:
len(s)

26

In [43]:
s[4]

'C'

In [44]:
s[10:15]

' Busi'

In [45]:
s.count('s')

4

In [46]:
s.find('B')

11

In [47]:
s.find('X')

-1

In [48]:
s[11]

'B'

In [49]:
s.replace("a","the")

'EDHEC is the Business School'

In [50]:
s.lower()

'edhec is a business school'

In [51]:
s.upper()

'EDHEC IS A BUSINESS SCHOOL'

In [52]:
q = "in France"

In [53]:
r = s + " " + q

In [54]:
print(r)

EDHEC is a Business School in France
