# Numpy: python package for numerical calculations
## numpy arrays

In [1]:
import numpy as np
#creating array object
arr = np.array([1,3,4,5,6])
#printing array object
arr

array([1, 3, 4, 5, 6])

In [2]:
# shape of array
arr.shape
# array contains only one row

(5,)

In [3]:
# printing type of array
arr.dtype

dtype('int32')

In [4]:
# trying to create array with different data types
arr = np.array([1,'st','er',3])
arr.dtype
# all the data types get converted into strings

dtype('<U11')

In [6]:
# adding arrays
np.sum(arr)

TypeError: cannot perform reduce with flexible type

### Creating arrays

In [7]:
# creating arrays
arr = np.array([[1,2,3],[2,4,6],[8,8,8]])
# getting shape of an array with 3 rows and 3 columns
arr.shape

(3, 3)

In [8]:
# stacking up arrays in rows and columns
arr

array([[1, 2, 3],
       [2, 4, 6],
       [8, 8, 8]])

In [10]:
# creating an array containing zeros only
# 2 rows and 4 columns
arr = np.zeros((2,4))
arr

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [12]:
# creating an array of ones only
# 2 rows and 4 columns
arr = np.ones((2,4))
arr

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [14]:
# creating an identity matrix
# 3 rows and 3 columns
arr = np.identity(3)
arr

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [16]:
# creating arrays with random numbers
arr = np.random.randn(3,4)
arr

array([[ 1.12814852, -0.59421153, -0.66676505,  0.13539853],
       [-1.30073175, -0.14226006, -0.76877445,  0.81349057],
       [-0.80900293, -1.40630216,  1.93502158, -1.1121585 ]])

In [20]:
# command used to read data from text file to numpy array is using genfromtext command
from io import BytesIO
b = BytesIO(b"2,23,33\n32,42,63.4\n35,77,12")
arr = np.genfromtxt(b, delimiter=",")
arr

array([[ 2. , 23. , 33. ],
       [32. , 42. , 63.4],
       [35. , 77. , 12. ]])

###  Methods of Accessing array elements 
#### Simple indexing

In [21]:
# accessing second row of the array as created in the above example
arr[1]

array([32. , 42. , 63.4])

In [23]:
# for arrays having more than two dimensions
arr = np.arange(12).reshape(2,2,3)
arr

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])

In [24]:
# getting one dimension less than the original array
arr[0]

array([[0, 1, 2],
       [3, 4, 5]])

In [26]:
# use of slicing to access the elements
# accessing collection of element rather than all elements in an array
# example for one dimentional array
arr = np.arange(10)
arr[5:]


array([5, 6, 7, 8, 9])

In [27]:
arr[5:8]

array([5, 6, 7])

In [28]:
arr[:-5]

array([0, 1, 2, 3, 4])

In [32]:
# if number of dimensions of objects supplied is less than the dimensions of array than : is used
arr = np.arange(12).reshape(2,2,3)
arr

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])

In [33]:
arr[1:2]

array([[[ 6,  7,  8],
        [ 9, 10, 11]]])

In [35]:
arr = np.arange(27).reshape(3,3,3)
arr

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23],
        [24, 25, 26]]])

In [36]:
arr[:,:,2]

array([[ 2,  5,  8],
       [11, 14, 17],
       [20, 23, 26]])

In [37]:
arr[...,2]

array([[ 2,  5,  8],
       [11, 14, 17],
       [20, 23, 26]])

#### Advanced Indexing or Dot(...) based indexing

In [39]:
arr = np.arange(9).reshape(3,3)
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [40]:
arr[[0,1,2],[1,0,0]]

array([1, 3, 6])

##### Boolean Indexing

In [43]:
# selection based on actual values of data in dataset and not based on the integer location
cities = np.array(["delhi","banglaore","mumbai","chennai","bhopal"])
city_data = np.random.randn(5,3)
city_data

array([[-0.90988522,  0.84087335, -2.16698284],
       [-0.28689906, -0.45182407,  0.61697567],
       [ 1.16594749, -0.36020315, -0.90375515],
       [ 0.07836359,  0.93973459,  1.37452886],
       [ 0.61823124, -0.25110024,  1.09060795]])

In [44]:
city_data[cities =="delhi"]

array([[-0.90988522,  0.84087335, -2.16698284]])

In [45]:
city_data[city_data >0]

array([0.84087335, 0.61697567, 1.16594749, 0.07836359, 0.93973459,
       1.37452886, 0.61823124, 1.09060795])

In [47]:
city_data[city_data >0] = 0
city_data


array([[-0.90988522,  0.        , -2.16698284],
       [-0.28689906, -0.45182407,  0.        ],
       [ 0.        , -0.36020315, -0.90375515],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        , -0.25110024,  0.        ]])

#### Operations on arrays

In [49]:
# creating array with a range of 15 and 3 rows and 5 columns
arr = np.arange(15).reshape(3,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [50]:
# adding 5 to the entire array
arr + 5

array([[ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [51]:
# multiplication
arr * 2

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])

In [54]:
# creating two arrays and getting output as their sum
#array 1 has 5 rows and 3 columns witha a range of 15
arr1 = np.arange(15).reshape(5,3)
# array 2 has 5 rows and 1 columns and has a range of 5
arr2 = np.arange(5).reshape(5,1)
arr2 + arr1

array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14],
       [16, 17, 18]])

In [55]:
# getting values of array 1 from the above example
arr1

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [56]:
# getting value of array 2 from the above example
arr2

array([[0],
       [1],
       [2],
       [3],
       [4]])

In [58]:
# assining random values to array 1
arr1 = np.random.randn(5,3)
arr1

array([[-0.2855029 , -0.71270581, -0.73892183],
       [ 0.48779769,  0.75426164,  1.06549313],
       [-1.39662308, -1.15728406,  1.61631553],
       [-0.19077731, -1.32218977,  0.25856881],
       [ 0.68053158, -0.62130551,  0.25494705]])

In [59]:
# Return the fractional and integral parts of an array, element-wise.
# The fractional and integral parts are negative if the given number is negative.
np.modf(arr1)

(array([[-0.2855029 , -0.71270581, -0.73892183],
        [ 0.48779769,  0.75426164,  0.06549313],
        [-0.39662308, -0.15728406,  0.61631553],
        [-0.19077731, -0.32218977,  0.25856881],
        [ 0.68053158, -0.62130551,  0.25494705]]), array([[-0., -0., -0.],
        [ 0.,  0.,  1.],
        [-1., -1.,  1.],
        [-0., -1.,  0.],
        [ 0., -0.,  0.]]))

#### Linear algebra using numpy

In [62]:
# create arrays
A = np.array([[1,2,3],[4,5,6],[7,8,9]])
B = np.array([[9,8,7],[6,5,4],[1,2,3]])
# dot product of A and B
A.dot(B)

array([[ 24,  24,  24],
       [ 72,  69,  66],
       [120, 114, 108]])

In [64]:
A = np.arange(15).reshape(3,5)
# multiplying A with its transpose
A.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [65]:
# Single Value Decomposition decomposes the matrix A effectively into rotations P and Q and the diagonal matrix D
np.linalg.svd(A)

(array([[-0.15425367,  0.89974393,  0.40824829],
        [-0.50248417,  0.28432901, -0.81649658],
        [-0.85071468, -0.3310859 ,  0.40824829]]),
 array([31.74202651,  2.72832424,  0.        ]),
 array([[-0.34716018, -0.39465093, -0.44214167, -0.48963242, -0.53712316],
        [-0.69244481, -0.37980343, -0.06716206,  0.24547932,  0.55812069],
        [-0.3545375 , -0.04008557,  0.87009952, -0.20179231, -0.27368413],
        [-0.36504752,  0.35761581, -0.14090063,  0.6691439 , -0.52081157],
        [-0.37555754,  0.7553172 , -0.15190078, -0.45991989,  0.232061  ]]))

In [69]:
a = np.array([[7,5,-3], [3,-5,2],[5,3,-7]])
b = np.array([16,-8,0])
x = np.linalg.solve(a, b)
x

array([1., 3., 2.])

In [70]:
# Returns True if two arrays are element-wise equal within a tolerance
# The relative difference and the absolute difference are added together to compare against the absolute difference between arr1 and arr2.
np.allclose(np.dot(a, x), b)

True

# Pandas
## Data frames
## Data frames, unlike numpy functions are heterogeneous in nature and are helpful in tabular representataion

In [74]:
# data manipulation
import pandas as pd
d =  [{'city':'Delhi',"data":1000},
      {'city':'Banglaore',"data":2000},
      {'city':'Mumbai',"data":1000}]
# getting data
pd.DataFrame(d)

Unnamed: 0,city,data
0,Delhi,1000
1,Banglaore,2000
2,Mumbai,1000


In [75]:
df = pd.DataFrame(d)

### Reading in data

In [76]:
city_data = pd.read_csv(filepath_or_buffer='simplemaps-worldcities-basic.csv')

In [79]:
city_data.head(n=10)

Unnamed: 0,city,city_ascii,lat,lng,pop,country,iso2,iso3,province
0,Qal eh-ye Now,Qal eh-ye,34.983,63.1333,2997.0,Afghanistan,AF,AFG,Badghis
1,Chaghcharan,Chaghcharan,34.516701,65.250001,15000.0,Afghanistan,AF,AFG,Ghor
2,Lashkar Gah,Lashkar Gah,31.582998,64.36,201546.0,Afghanistan,AF,AFG,Hilmand
3,Zaranj,Zaranj,31.112001,61.886998,49851.0,Afghanistan,AF,AFG,Nimroz
4,Tarin Kowt,Tarin Kowt,32.633298,65.866699,10000.0,Afghanistan,AF,AFG,Uruzgan
5,Zareh Sharan,Zareh Sharan,32.85,68.416705,13737.0,Afghanistan,AF,AFG,Paktika
6,Asadabad,Asadabad,34.866,71.150005,48400.0,Afghanistan,AF,AFG,Kunar
7,Taloqan,Taloqan,36.729999,69.540004,64256.0,Afghanistan,AF,AFG,Takhar
8,Mahmud-E Eraqi,Mahmud-E Eraqi,35.016696,69.333301,7407.0,Afghanistan,AF,AFG,Kapisa
9,Mehtar Lam,Mehtar Lam,34.65,70.166701,17345.0,Afghanistan,AF,AFG,Laghman


In [80]:
city_data.tail()

Unnamed: 0,city,city_ascii,lat,lng,pop,country,iso2,iso3,province
7317,Mutare,Mutare,-18.970019,32.650038,216785.0,Zimbabwe,ZW,ZWE,Manicaland
7318,Kadoma,Kadoma,-18.330006,29.909947,56400.0,Zimbabwe,ZW,ZWE,Mashonaland West
7319,Chitungwiza,Chitungwiza,-18.000001,31.100003,331071.0,Zimbabwe,ZW,ZWE,Harare
7320,Harare,Harare,-17.81779,31.044709,1557406.5,Zimbabwe,ZW,ZWE,Harare
7321,Bulawayo,Bulawayo,-20.169998,28.580002,697096.0,Zimbabwe,ZW,ZWE,Bulawayo


In [81]:
series_es = city_data.lat

In [82]:
type(series_es)

pandas.core.series.Series

In [58]:
series_es[1:10:2]

1    34.516701
3    31.112001
5    32.850000
7    36.729999
9    34.650000
Name: lat, dtype: float64

In [84]:
# data slicing
series_es[:7]

0    34.983000
1    34.516701
2    31.582998
3    31.112001
4    32.633298
5    32.850000
6    34.866000
Name: lat, dtype: float64

In [85]:
series_es[:-7315]

0    34.983000
1    34.516701
2    31.582998
3    31.112001
4    32.633298
5    32.850000
6    34.866000
Name: lat, dtype: float64

In [86]:
# contains rows only upto 6
city_data[:7]

Unnamed: 0,city,city_ascii,lat,lng,pop,country,iso2,iso3,province
0,Qal eh-ye Now,Qal eh-ye,34.983,63.1333,2997.0,Afghanistan,AF,AFG,Badghis
1,Chaghcharan,Chaghcharan,34.516701,65.250001,15000.0,Afghanistan,AF,AFG,Ghor
2,Lashkar Gah,Lashkar Gah,31.582998,64.36,201546.0,Afghanistan,AF,AFG,Hilmand
3,Zaranj,Zaranj,31.112001,61.886998,49851.0,Afghanistan,AF,AFG,Nimroz
4,Tarin Kowt,Tarin Kowt,32.633298,65.866699,10000.0,Afghanistan,AF,AFG,Uruzgan
5,Zareh Sharan,Zareh Sharan,32.85,68.416705,13737.0,Afghanistan,AF,AFG,Paktika
6,Asadabad,Asadabad,34.866,71.150005,48400.0,Afghanistan,AF,AFG,Kunar


In [87]:
# contains only first 5 rows and 4 columns
city_data.iloc[:5,:4]

Unnamed: 0,city,city_ascii,lat,lng
0,Qal eh-ye Now,Qal eh-ye,34.983,63.1333
1,Chaghcharan,Chaghcharan,34.516701,65.250001
2,Lashkar Gah,Lashkar Gah,31.582998,64.36
3,Zaranj,Zaranj,31.112001,61.886998
4,Tarin Kowt,Tarin Kowt,32.633298,65.866699


In [88]:
# selecting specific data, rows and columns to work with, here based on population
city_data[city_data['pop'] > 10000000][city_data.columns[pd.Series(city_data.columns).str.startswith('l')]]

Unnamed: 0,lat,lng
360,-34.602502,-58.397531
1171,-23.55868,-46.62502
2068,31.216452,121.436505
3098,28.669993,77.230004
3110,19.01699,72.856989
3492,35.685017,139.751407
4074,19.442442,-99.130988
4513,24.869992,66.990009
5394,55.752164,37.615523
6124,41.104996,29.010002


In [91]:
# selection of data based on conditions
city_greater_10mil = city_data[city_data['pop'] > 10000000]
city_greater_10mil.rename(columns={'pop':'population'}, inplace=True)
city_greater_10mil.where(city_greater_10mil.population > 15000000)

Unnamed: 0,city,city_ascii,lat,lng,population,country,iso2,iso3,province
360,,,,,,,,,
1171,,,,,,,,,
2068,,,,,,,,,
3098,,,,,,,,,
3110,Mumbai,Mumbai,19.01699,72.856989,15834918.0,India,IN,IND,Maharashtra
3492,Tokyo,Tokyo,35.685017,139.751407,22006299.5,Japan,JP,JPN,Tokyo
4074,,,,,,,,,
4513,,,,,,,,,
5394,,,,,,,,,
6124,,,,,,,,,


In [93]:
df = pd.DataFrame(np.random.randn(8, 3),
columns=['A', 'B', 'C'])

### Operations on dataframes

In [95]:
nparray = df.values
type(nparray)

numpy.ndarray

In [97]:
from numpy import nan
df.iloc[4,2] = nan

In [98]:
df

Unnamed: 0,A,B,C
0,0.492198,-1.066198,-1.406244
1,0.220717,0.408496,2.046897
2,-1.139773,-0.344601,0.060726
3,-1.728301,-0.336897,0.738489
4,-0.460433,1.952699,
5,1.298387,-0.077249,1.022012
6,0.722525,-1.024814,-1.717827
7,-1.205463,-0.651381,-0.154832


In [99]:
df.fillna(0)

Unnamed: 0,A,B,C
0,0.492198,-1.066198,-1.406244
1,0.220717,0.408496,2.046897
2,-1.139773,-0.344601,0.060726
3,-1.728301,-0.336897,0.738489
4,-0.460433,1.952699,0.0
5,1.298387,-0.077249,1.022012
6,0.722525,-1.024814,-1.717827
7,-1.205463,-0.651381,-0.154832


In [100]:
columns_numeric = ['lat','lng','pop']

In [101]:
city_data[columns_numeric].mean()

lat        20.662876
lng        10.711914
pop    265463.071633
dtype: float64

In [102]:
city_data[columns_numeric].sum()

lat    1.512936e+05
lng    7.843263e+04
pop    1.943721e+09
dtype: float64

In [103]:
city_data[columns_numeric].count()

lat    7322
lng    7322
pop    7322
dtype: int64

In [104]:
city_data[columns_numeric].median()

lat       26.792730
lng       18.617509
pop    61322.750000
dtype: float64

In [105]:
city_data[columns_numeric].quantile(0.8)

lat        46.852480
lng        89.900018
pop    269210.000000
Name: 0.8, dtype: float64

In [106]:
city_data[columns_numeric].sum(axis = 1).head()

0      3095.116300
1     15099.766702
2    201641.942998
3     49943.998999
4     10098.499997
dtype: float64

In [107]:
city_data[columns_numeric].describe()

Unnamed: 0,lat,lng,pop
count,7322.0,7322.0,7322.0
mean,20.662876,10.711914,265463.1
std,29.134818,79.044615,828762.2
min,-89.982894,-179.589979,-99.0
25%,-0.32471,-64.788472,17344.25
50%,26.79273,18.617509,61322.75
75%,43.575448,73.103628,200172.6
max,82.483323,179.383304,22006300.0


In [108]:
city_data1 = city_data.sample(3)

### Concatanating data frames

In [109]:
city_data2 = city_data.sample(3)
city_data_combine = pd.concat([city_data1,city_data2])
city_data_combine

Unnamed: 0,city,city_ascii,lat,lng,pop,country,iso2,iso3,province
4770,Ponce,Ponce,18.000386,-66.616642,156484.0,Puerto Rico,PR,PRI,
6514,Cantwell,Cantwell,63.391594,-148.95079,222.0,United States of America,US,USA,Alaska
6660,Brookings,Brookings,44.306765,-96.78803,20313.5,United States of America,US,USA,South Dakota
714,San Borja,San Borja,-14.819626,-66.849961,19640.0,Bolivia,BO,BOL,El Beni
2763,Sisimiut,Sisimiut,66.950008,-53.666606,5227.0,Greenland,GL,GRL,Qeqqata Kommunia
1810,Shangrao,Shangrao,28.470393,117.969998,922421.5,China,CN,CHN,Jiangxi


In [110]:
df1 = pd.DataFrame({'col1': ['col10', 'col11', 'col12', 'col13'],
                    'col2': ['col20', 'col21', 'col22', 'col23'],
                    'col3': ['col30', 'col31', 'col32', 'col33'],
                    'col4': ['col40', 'col41', 'col42', 'col43']},
                   index=[0, 1, 2, 3])

In [111]:
df1

Unnamed: 0,col1,col2,col3,col4
0,col10,col20,col30,col40
1,col11,col21,col31,col41
2,col12,col22,col32,col42
3,col13,col23,col33,col43


In [112]:
df4 = pd.DataFrame({'col2': ['col22', 'col23', 'col26', 'col27'],
                    'Col4': ['Col42', 'Col43', 'Col46', 'Col47'],
                    'col6': ['col62', 'col63', 'col66', 'col67']},
                   index=[2, 3, 6, 7])

pd.concat([df1,df4], axis=1)

Unnamed: 0,col1,col2,col3,col4,col2.1,Col4,col6
0,col10,col20,col30,col40,,,
1,col11,col21,col31,col41,,,
2,col12,col22,col32,col42,col22,Col42,col62
3,col13,col23,col33,col43,col23,Col43,col63
6,,,,,col26,Col46,col66
7,,,,,col27,Col47,col67


In [113]:
country_data = city_data[['iso3','country']].drop_duplicates()

In [114]:
country_data.shape

(223, 2)

In [115]:
country_data.head()

Unnamed: 0,iso3,country
0,AFG,Afghanistan
33,ALD,Aland
34,ALB,Albania
60,DZA,Algeria
111,ASM,American Samoa


In [116]:
del(city_data['country'])

In [117]:
city_data.merge(country_data, 'inner').head()

Unnamed: 0,city,city_ascii,lat,lng,pop,iso2,iso3,province,country
0,Qal eh-ye Now,Qal eh-ye,34.983,63.1333,2997.0,AF,AFG,Badghis,Afghanistan
1,Chaghcharan,Chaghcharan,34.516701,65.250001,15000.0,AF,AFG,Ghor,Afghanistan
2,Lashkar Gah,Lashkar Gah,31.582998,64.36,201546.0,AF,AFG,Hilmand,Afghanistan
3,Zaranj,Zaranj,31.112001,61.886998,49851.0,AF,AFG,Nimroz,Afghanistan
4,Tarin Kowt,Tarin Kowt,32.633298,65.866699,10000.0,AF,AFG,Uruzgan,Afghanistan


# Scikit-learn

In [118]:
from sklearn import datasets
diabetes = datasets.load_diabetes()
X = diabetes.data[:10]
y = diabetes.target

In [119]:
X[:5]

array([[ 0.03807591,  0.05068012,  0.06169621,  0.02187235, -0.0442235 ,
        -0.03482076, -0.04340085, -0.00259226,  0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, -0.02632783, -0.00844872,
        -0.01916334,  0.07441156, -0.03949338, -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, -0.00567061, -0.04559945,
        -0.03419447, -0.03235593, -0.00259226,  0.00286377, -0.02593034],
       [-0.08906294, -0.04464164, -0.01159501, -0.03665645,  0.01219057,
         0.02499059, -0.03603757,  0.03430886,  0.02269202, -0.00936191],
       [ 0.00538306, -0.04464164, -0.03638469,  0.02187235,  0.00393485,
         0.01559614,  0.00814208, -0.00259226, -0.03199144, -0.04664087]])

In [120]:
y[:10]

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310.])

In [121]:
feature_names=['age', 'sex', 'bmi', 'bp',
               's1', 's2', 's3', 's4', 's5', 's6']

## Scikit example regression

In [122]:
from sklearn import datasets
from sklearn.linear_model import Lasso

from sklearn import linear_model, datasets
from sklearn.model_selection import GridSearchCV

diabetes = datasets.load_diabetes()
X_train = diabetes.data[:310]
y_train = diabetes.target[:310]

X_test = diabetes.data[310:]
y_test = diabetes.target[310:]

lasso = Lasso(random_state=0)
alphas = np.logspace(-4, -0.5, 30)

scores = list()
scores_std = list()

estimator = GridSearchCV(lasso,
                         param_grid = dict(alpha=alphas))

estimator.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=0,
   selection='cyclic', tol=0.0001, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'alpha': array([1.00000e-04, 1.32035e-04, 1.74333e-04, 2.30181e-04, 3.03920e-04,
       4.01281e-04, 5.29832e-04, 6.99564e-04, 9.23671e-04, 1.21957e-03,
       1.61026e-03, 2.12611e-03, 2.80722e-03, 3.70651e-03, 4.89390e-03,
       6.46167e-03, 8.53168e-03, 1.12648e-02, 1.48735e-02, 1.96383e-02,
       2.59294e-02, 3.42360e-02, 4.52035e-02, 5.96846e-02, 7.88046e-02,
       1.04050e-01, 1.37382e-01, 1.81393e-01, 2.39503e-01, 3.16228e-01])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [123]:
estimator.best_score_

0.4654063759023531

In [124]:
estimator.best_estimator_

Lasso(alpha=0.02592943797404667, copy_X=True, fit_intercept=True,
   max_iter=1000, normalize=False, positive=False, precompute=False,
   random_state=0, selection='cyclic', tol=0.0001, warm_start=False)

In [125]:
estimator.predict(X_test)

array([203.42104984, 177.6595529 , 122.62188598, 212.81136958,
       173.61633075, 114.76145025, 202.36033584, 171.70767813,
       164.28694562, 191.29091477, 191.41279009, 288.2772433 ,
       296.47009002, 234.53378413, 210.61427168, 228.62812055,
       156.74489991, 225.08834492, 191.75874632, 102.81600989,
       172.373221  , 111.20843429, 290.22242876, 178.64605207,
        78.13722832,  86.35832297, 256.41378529, 165.99622543,
       121.29260976, 153.48718848, 163.09835143, 180.0932902 ,
       161.4330553 , 155.80211635, 143.70181085, 126.13753819,
       181.06471818, 105.03679977, 131.0479936 ,  90.50606427,
       252.66486639,  84.84786067,  59.41005358, 184.51368208,
       201.46598714, 129.96333913,  90.65641478, 200.10932516,
        55.2884802 , 171.60459062, 195.40750666, 122.14139787,
       231.72783897, 159.49750022, 160.32104862, 165.53701866,
       260.73217736, 259.77213787, 204.69526082, 185.66480969,
        61.09821961, 209.9214333 , 108.50410841, 141.18

## Neural Network Frameworks

### Theano example 

In [132]:
import numpy
import theano.tensor as T
from theano import function
x = T.dscalar('x')
y = T.dscalar('y')
z = x + y

ModuleNotFoundError: No module named 'theano'

In [133]:
f = function([x, y], z)
f(8, 2)

NameError: name 'function' is not defined

### Tensorflow example

In [102]:
import tensorflow as tf
hello = tf.constant('Hello, TensorFlow!')
sess = tf.Session()
print(sess.run(hello))

b'Hello, TensorFlow!'


### Building a neural network model with Keras

In [103]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

X_train = cancer.data[:340]
y_train = cancer.target[:340]

X_test = cancer.data[340:]
y_test = cancer.target[340:]

import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout

Using TensorFlow backend.


In [150]:
model = Sequential()
model.add(Dense(15, input_dim=30, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [151]:
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [152]:
model.fit(X_train, y_train,
          epochs=20,
          batch_size=50)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1d49ea58be0>

In [153]:
predictions = model.predict_classes(X_test)

 32/229 [===>..........................] - ETA: 1s

In [154]:
from sklearn import metrics

print('Accuracy:', metrics.accuracy_score(y_true=y_test, y_pred=predictions))
print(metrics.classification_report(y_true=y_test, y_pred=predictions))

Accuracy: 0.759825327511
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        55
          1       0.76      1.00      0.86       174

avg / total       0.58      0.76      0.66       229



  'precision', 'predicted', average, warn_for)


### The power of deep learning models

In [155]:
model = Sequential()
model.add(Dense(15, input_dim=30, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(X_train, y_train,
          epochs=20,
          batch_size=50)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1d49ee45908>

In [156]:
predictions = model.predict_classes(X_test)

 32/229 [===>..........................] - ETA: 1s

In [157]:
print('Accuracy:', metrics.accuracy_score(y_true=y_test, y_pred=predictions))
print(metrics.classification_report(y_true=y_test, y_pred=predictions))

Accuracy: 0.912663755459
             precision    recall  f1-score   support

          0       0.78      0.89      0.83        55
          1       0.96      0.92      0.94       174

avg / total       0.92      0.91      0.91       229

