In this notebook we are going to see some fundamentals of Python and of the related packages (Numpy, Scikit-learn, etc.) 

# Arrays in numpy

In [21]:
import numpy as np
#create vector given values in it
a = np.array([1.0, 2.0, 3.0])
print('a:')
print(a)
print('a.shape =', a.shape, end='\n\n')

#create matrix  given values in it
b = np.array([[1, 2, 3], [4, 5, 6]])
print('b:')
print(b)
print('b.shape =', b.shape, end='\n\n')

#create matrix of 0's of given size
c = np.zeros((3,2))
print('c:')
print(c)
print('c.shape', c.shape, end='\n\n')

#create matrix of size 1x3 (different from array of 3 elements!)
c1 = np.zeros((1,3))
c1[0,:] = [1, 2, 3]
print('c1:')
print(c1)
print('c1.shape =', c1.shape, ' <- notice the difference with the first example (a) !!', end='\n\n')

#create matrix of 1's of given size
d = np.ones((2,3))
print('d:')
print(d, end='\n\n')

#create identity matrix of given size
e = np.eye(4)
print('e:')
print(e, end='\n\n')

#create random matrix (values in [0,1]) of given size
f = np.random.random((2,4))
print('f:')
print(f, end='\n\n')

a:
[1. 2. 3.]
a.shape = (3,)

b:
[[1 2 3]
 [4 5 6]]
b.shape = (2, 3)

c:
[[0. 0.]
 [0. 0.]
 [0. 0.]]
c.shape (3, 2)

c1:
[[1. 2. 3.]]
c1.shape = (1, 3)  <- notice the difference with the first example (a) !!

d:
[[1. 1. 1.]
 [1. 1. 1.]]

e:
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]

f:
[[0.2978403  0.86118454 0.97853982 0.90239901]
 [0.61399409 0.96707241 0.566517   0.3994777 ]]



# Array indexing in numpy

In [22]:
e = np.array([[1,5,7,9],[2,6,8,10]]);
print(e[:,:])

[[ 1  5  7  9]
 [ 2  6  8 10]]


# Reading from CSV file: built-in functions method [1/2]

In [9]:
file_name = "data/marks.csv"
infile = open(file_name,'r')
line_c = 0 # line counter
for line in infile:
    if line_c <=2:  # read only the first 4 lines
        line = line.strip() #strip removes whitespaces and newlines characters
        print("Line:\n"+line) # newline removed ^
        v = line.split(',') # split breaks up the string in chunks delimited by the argument
        print(type(v), v) # either print v like this
        print("List: "+ str(v)) # or cast to list and use + to concatenate
        print(type(v[0])) # v is a list of strings
        print("Elements in list:")
        for i in range(len(v)):
            print(v[i].strip(), end=' ') #strip removes whitespaces, end parameter avoids newline
        print('\n')
    line_c += 1
infile.close()  # remember to close the file when not used anymore

Line:
1,23,21,20
<class 'list'> ['1', '23', '21', '20']
List: ['1', '23', '21', '20']
<class 'str'>
Elements in list:
1 23 21 20 

Line:
2,28,30,29
<class 'list'> ['2', '28', '30', '29']
List: ['2', '28', '30', '29']
<class 'str'>
Elements in list:
2 28 30 29 

Line:
3,25,20,29
<class 'list'> ['3', '25', '20', '29']
List: ['3', '25', '20', '29']
<class 'str'>
Elements in list:
3 25 20 29 



### Automatic file.close() – 'with' environment

Sometimes one does not need the complexity allowed by f = open(filename) ... f.close().\
In those cases we can use the **with** environment.

In [None]:
file_name = "data/marks.csv"
line_c = 0
with open(file_name,'r') as infile:  # infile = open(file_name,'r')
    for line in infile:
        if line_c <=4:  # read only the first 4 lines
            line = line.strip() #strip removes whitespaces and newlines characters
            print("Line:\n"+line) # newline removed ^
            v = line.split(',') # split breaks up the string in chunks delimited by the argument
            print("List: "+str(v))
            print("Elements in list:")
            for i in range(len(v)):
                print(v[i].strip(), end=' ') #strip removes whitespaces
            print('\n')
        line_c += 1
# infile.close() executed automatically when we exit the indented block

# Writing to file

In [None]:
infile_name = "data/marks.csv"
infile = open(file_name,'r')
line_c = 0
outfile_name = "tmp.txt"
outfile = open(outfile_name,'w')
for line in infile:
    if line_c <=4:
        outfile.write("Line:\n"+line+"\n")
        v = line.split(',')
        outfile.write("List: "+str(v)+"\n")
        outfile.write("Elements in list:\n")
        for i in range(len(v)):
            outfile.write(v[i].strip()+"\n")
    line_c += 1
infile.close()
outfile.write(str(10.))
outfile.close()

# Reading from CSV file: csv library method [2/2]

In [15]:
import csv

filename = "data/marks.csv"

lines = csv.reader(open(filename, newline=''), delimiter=',')
print('type(lines) = ', type(lines))
#print('type(lines[0]) = ', type(lines[0])) #does NOT work: csv.reader object is not subscrictable
# Instead, you need to access elements in a loop
for line in lines:
    print(line)

type(lines) =  <class '_csv.reader'>


TypeError: '_csv.reader' object is not subscriptable

In [None]:
import csv

filename = "data/marks.csv"
lines = csv.reader(open(filename, newline=''), delimiter=',')

dataset = list(lines)
for i in range(len(dataset)):
    dataset[i] = [float(x) for x in dataset[i]]
print(dataset)
print('Number of students:', len(dataset), end='\n\n')
# you can convert lists to numpy for automatic print formatting:
print(np.array(dataset))