# imports

In [128]:
import numpy as np
import pandas as pd
from time import time, sleep
import re
from collections import Counter

# shorthand if-else

In [53]:
age = 15

In [54]:
# Traditional if-else
if age<18:
    print ('teenager')
else:
    print ('adult')

teenager


In [55]:
# Short hand if-else notation 
print('teenager' if age<18 else 'adult')

teenager


In [56]:
# Traditional nested if-elif-else
if age < 13:
    print ('kid')
elif age < 18:
    print ('teenager')
else:
    print ('adult')

teenager


In [57]:
# Short hand id-elif-else
print('kid' if age<13 else 'teenager' if age<18 else 'adult')

teenager


# enumerate

A lot of times when dealing with iterators, we also get a need to keep a count of iterations. Python eases the programmers’ task by providing a built-in function `enumerate()` for this task.

In [13]:
letters = ['a','b','c','d','e','a','b','c','d','e']

In [14]:
# Using manual counter inside loop
i = 0
for letter in letters:
    print (i, letter)
    i+=1

0 a
1 b
2 c
3 d
4 e
5 a
6 b
7 c
8 d
9 e


In [15]:
# Using range and len function
for i in range(len(letters)):
    print (i, letters[i])

0 a
1 b
2 c
3 d
4 e
5 a
6 b
7 c
8 d
9 e


In [11]:
# Using enumerate function
letters = ['a','b','c','d','e']
for i, letter in enumerate(letters):
    print (i, letter)

0 a
1 b
2 c
3 d
4 e


# zip

Used for parallel iteration

In [234]:
a = range(0,10)
b = range(10,30,2)
c = range(100,10000,20)
for i,j,k in zip(a,b,c):
    print (i,j,k)

0 10 100
1 12 120
2 14 140
3 16 160
4 18 180
5 20 200
6 22 220
7 24 240
8 26 260
9 28 280


In [239]:
a = range(1,10)
b = ['a','b','c','d','e','f','g','h','i','j']
for i,j in zip(a,b):
    print (i,j, i*j)

1 a a
2 b bb
3 c ccc
4 d dddd
5 e eeeee
6 f ffffff
7 g ggggggg
8 h hhhhhhhh
9 i iiiiiiiii


# Comprehension
## list comprehension

Simple for loops can be written using list comprehension.

    - Let's see an example that extracts only the odd numbers!

In [58]:
# Tradition for loop 
result = []
for i in range(10):
    if i%2 == 0:
        result.append(i)
print (result)

[0, 2, 4, 6, 8]


In [61]:
# List comprehension
print ([i for i in range(10) if i%2==0])

[0, 2, 4, 6, 8]


Another example
    - Square of first 10 numbers (Starting from 0)

In [62]:
print ([i**2 for i in range(10)])

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


## Dict comprehension

It allows us to encapsulate several lines you use to create dictionaries into one line. It’s is similar to list comprehension but we use dict literals `{}` instead of `[]`

Example
    - Let's see how to convert the list items into dictionary keys and convert item into lower case string

In [68]:
l_fruits = ['APPLE', 'MANGO', 'ORANGE']

In [74]:
d_fruits = {}
for fruit in l_fruits:
    d_fruits[fruit.lower()] = 1
print (d_fruits)

{'apple': 1, 'mango': 1, 'orange': 1}


In [75]:
{fruit.lower():1 for fruit in l_fruits}

{'apple': 1, 'mango': 1, 'orange': 1}

# String concatenation

In [None]:
words = ["hello", "world", "how", "are", "you"]

In [77]:
# Traditional string concatenation using for loop
words_joined = ''
for word in words:
    words_joined = words_joined + word + ' '
print (words_joined)

hello world how are you 


In [78]:
# String concatenation using .join() method
print (' '.join(words))

hello world how are you


In [80]:
print (' | '.join(words))

hello | world | how | are | you


# lambda functions

`lambda` is helpful to write single line functions without naming a function

In [84]:
# Traditional function using def 
def add3_v1(a):
    return a+3

print (add3_v1(4))

7


The code below will return the function reference here we can assign it to any arbitrary variable

In [86]:
add3_v2 = lambda a : a + 3
print (add3_v2(4))

7


Self called lamda
    - We can also write the lambda and make it call itself

In [91]:
print ((lambda a: a+3)(4))

7


Another example
    - Lambda function with 3 input arguments that sums the input values

In [93]:
x = lambda a, b, c : a + b + c
print (x(1,2,3))

6


# regex

A RegEx, or Regular Expression, is a sequence of characters that forms a search pattern.

RegEx can be used to check if a string contains the specified search pattern.

Example 1
    - The code below checks whether the `txt` starts with `The`, has `rain` in it, and ends with `Spain`.

In [162]:
txt = "The rain in Spain"

In [163]:
if txt.startswith("The") and "rain" in txt and txt.endswith("Spain"):
    print (txt)

- `^The` 
    - means the text should start with `The`
- `.*` 
    - means the text may have one or more characters 
- `(rain)`
    - means the text should have the word `rain` somwhere after the word `The` 
- `Spain$`
    - means the text should end with `Spain`


In [165]:
# Using re.search and Escape characters ^.*$
if re.search("^The.*(rain).*Spain$", txt):
    print (txt)

  Example 2
      - Extract only the dollar values from the sentence. 
            - You have to spend a lot of time and write long code to implement usign traditional string based search
            - With regex, we can implement it in one line

In [166]:
txt2 = 'We just received $10.20 for the cookies'

- `\$` - Locates a real dollar sign
- `[0-9.]` - A digit or dot
- `+` - At least one ore more character

In [168]:
re.findall('\$[0-9.]+',txt2)

['$10.20']

In [170]:
txt3 = 'We just received $10.20 for the cookies. I have to give a change of $2.5 back'
re.findall('\$[0-9.]+',txt3)

['$10.20', '$2.5']

# Counter

In [228]:
# Reads .txt data 
mbox_data = open('data/mbox-short.txt', 'r').read()
print (mbox_data[:500])

From stephen.marquard@uct.ac.za Sat Jan  5 09:14:16 2008
Return-Path: <postmaster@collab.sakaiproject.org>
Received: from murder (mail.umich.edu [141.211.14.90])
	 by frankenstein.mail.umich.edu (Cyrus v2.3.8) with LMTPA;
	 Sat, 05 Jan 2008 09:14:16 -0500
X-Sieve: CMU Sieve 2.3
Received: from murder ([unix socket])
	 by mail.umich.edu (Cyrus v2.2.12) with LMTPA;
	 Sat, 05 Jan 2008 09:14:16 -0500
Received: from holes.mr.itd.umich.edu (holes.mr.itd.umich.edu [141.211.14.79])
	by flawless.mail.umic


In [231]:
# Make them as a list of words
mbox_data_split = mbox_data.strip().split(' ')
print (mbox_data_split[:20])

['From', 'stephen.marquard@uct.ac.za', 'Sat', 'Jan', '', '5', '09:14:16', '2008\nReturn-Path:', '<postmaster@collab.sakaiproject.org>\nReceived:', 'from', 'murder', '(mail.umich.edu', '[141.211.14.90])\n\t', 'by', 'frankenstein.mail.umich.edu', '(Cyrus', 'v2.3.8)', 'with', 'LMTPA;\n\t', 'Sat,']


In [226]:
# Count the freq of unique words in the text file
freq = Counter(mbox_data_split)
for i, (word, count) in enumerate(freq.items()):
    print (word, "<=>", count)
    # Print first 10 words
    if i==10: 
        break

From <=> 1
stephen.marquard@uct.ac.za <=> 4
Sat <=> 2
Jan <=> 352
 <=> 902
5 <=> 10
09:14:16 <=> 4
2008
Return-Path: <=> 27
<postmaster@collab.sakaiproject.org>
Received: <=> 27
from <=> 218
murder <=> 54


# Power of numpy

## Square of a vector

In [126]:
start_time = time()

a = list(range(0,100000,1))
b = []
for i in a:
    b.append(i**2)
    
end_time = time()
print ('Time elapsed:', end_time - start_time)

Time elapsed: 0.2508561611175537


Now, let's see how much time it takes using numpy (vectorization)

In [127]:
start_time = time()

a = np.arange(0,100000,1)
b = np.square(a)

end_time = time()
print ('Time elapsed:', end_time - start_time)

Time elapsed: 0.007993936538696289


## Dot product of 2 vectors

$a = [a_{1}, a_{n}, a_{3}, ... , a_{n}]$ & $b = [b_{1}, b_{2}, b_{3}, ... , b_{n}]$

$c = a.b = [a{1}*b{1} + a{2}*b{2} + ... + a{n}*b{n}]$

In [108]:
a = np.random.randn(1000000)
b = np.random.randn(1000000)

In [118]:
# Dot product in traditional for loop setting
start_time = time()

c = 0
for i,j in zip(a,b):
    c = c + i*j
print (c)

end_time = time()
print ('Time elapsed:', end_time - start_time)

-423.1358127918502
Time elapsed: 4.40748143196106


In [119]:
# Dot product using numpy
start_time = time()

print (np.dot(a,b))

end_time = time()
print ('Time elapsed:', end_time - start_time)

-423.1358127918902
Time elapsed: 0.006995677947998047


# Power of Pandas

In [185]:
a = np.random.randint(low=-9999, high=9999, size=(5000,10), dtype='int')
print ("Array shape:", a.shape)

Array shape: (5000, 10)


In [189]:
print ('First 5 row values in array')
print (a[0:5,:])

First 5 row values in array
[[ 8665  3439  9676 -2913 -5027 -1009  7213   577  3733  2788]
 [ 5087  5886 -2491  -401  8569  8710  5347 -4611 -9134   409]
 [ 8354 -7601  8482 -9339  7898 -1866  3875  -255   305 -1166]
 [-6570  5010   951  9995 -4884 -5431 -7937  1772  9745  8497]
 [ 9011 -2853  8334  4679  5893   558 -4850  -877  3232 -6574]]


In [182]:
# Write the entire array data into csv file by one line
np.savetxt('data/data1.csv', a, delimiter=",")

In [184]:
# Read the entire csv in on line
df = pd.read_csv('data/data1.csv', header=None)
df.shape

(5000, 10)

In [190]:
print(df.head(n=5))

        0       1       2       3       4       5       6       7       8  \
0 -7854.0  7875.0  -632.0 -8854.0 -5691.0 -2334.0 -1310.0 -7933.0  3448.0   
1 -9484.0  3953.0  1400.0  4274.0 -7045.0 -1067.0 -8537.0  4745.0  4095.0   
2  5832.0  2921.0 -1770.0  -355.0  8607.0 -5338.0  1578.0 -1806.0 -2519.0   
3  5207.0  1146.0  5068.0 -4338.0 -5966.0 -4945.0  6653.0  1565.0  5226.0   
4   144.0 -3751.0 -2496.0 -1437.0 -4437.0 -2510.0 -4167.0 -6520.0   508.0   

        9  
0 -6106.0  
1  5853.0  
2  4004.0  
3 -6685.0  
4  9246.0  


# Python and JSON

https://www.w3schools.com/python/python_json.asp