# <font color=darkblue> Part 1: Exploring NumPy </font>

In [1]:
# Load library
import numpy as np

# Create a vector as a row
vector_row = np.array([1, 2, 3])
vector_row

array([1, 2, 3])

In [None]:
# Create a vector as a column
vector_column = np.array([[1],[2],[3]])
vector_column

array([[1],
       [2],
       [3]])

In [None]:
# Create a matrix
matrix = np.array([[1, 2],[1, 2],[1, 2]])
matrix

array([[1, 2],
       [1, 2],
       [1, 2]])

In [None]:
# Type of the matrix
type(matrix)

numpy.ndarray

In [4]:
# Initializing an array with all '1' values
np.ones( (3,4), dtype=np.int16 )  

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int16)

In [None]:
# Initializing an array of with some value
np.full( (3,4), 0.11 )  

array([[0.11, 0.11, 0.11, 0.11],
       [0.11, 0.11, 0.11, 0.11],
       [0.11, 0.11, 0.11, 0.11]])

In [6]:
# Arrange an array of intervals
np.arange( 10, 30, 5 )

array([10, 15, 20, 25])

In [None]:
# Arrange an array of intervals
np.arange( 0, 2, 0.3 )             

array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8])

In [None]:
# Generate random samples 
np.random.rand(2,3)

array([[0.81267613, 0.47776384, 0.47755178],
       [0.43841073, 0.40484431, 0.27627684]])

### Some other important attributes

Some of the important attributes of a NumPy object are:
1.	Ndim: displays the dimension of the array
2.	Shape: returns a tuple of integers indicating the size of the array
3.	Size: returns the total number of elements in the NumPy array
4.	Dtype: returns the type of elements in the array, i.e., int64, character
5.	Itemsize: returns the size in bytes of each item
6.	Reshape: Reshapes the NumPy array

### Examples of Indexing

- 	A[2:5] will print items 2 to 4. Index in NumPy arrays starts from 0
- 	A[2::2] will print items 2 to end skipping 2 items
- 	A[::-1] will print the array in the reverse order
- 	A[1:] will print from row 1 to end


In [13]:
A=np.random.rand(5,3)
A

array([[0.37259624, 0.29676717, 0.12104007],
       [0.48208606, 0.48976124, 0.7383381 ],
       [0.23881896, 0.2349791 , 0.73767858],
       [0.46414017, 0.59635515, 0.47982603],
       [0.40697569, 0.3299364 , 0.13739536]])

In [23]:
A.shape

(5, 3)

In [24]:
A.size

15

In [22]:
A[1::3]

array([[0.48208606, 0.48976124, 0.7383381 ],
       [0.40697569, 0.3299364 , 0.13739536]])

# <font color=darkblue> Part 2: Exploring pandas </font>

In [26]:
#Import library
import pandas as pd

#Creating a dataframe
people_dict = { "weight": pd.Series([68, 83, 112], index=["alice", "bob", "charles"]),   
                "birthyear": pd.Series([1984, 1985, 1992], index=["bob", "alice", "charles"]),
                "children": pd.Series([0, 3], index=["charles", "bob"]),
                "hobby": pd.Series(["Biking", "Dancing"], index=["alice", "bob"]),}
people = pd.DataFrame(people_dict)
people

Unnamed: 0,weight,birthyear,children,hobby
alice,68,1985,,Biking
bob,83,1984,3.0,Dancing
charles,112,1992,0.0,


In [27]:
# Selecting only people born before 1990
people[people["birthyear"] < 1990]

Unnamed: 0,weight,birthyear,children,hobby
alice,68,1985,,Biking
bob,83,1984,3.0,Dancing


### Some essential methods in dataframes:

1.	head(): returns the top 5 rows in the dataframe object
2.	tail(): returns the bottom 5 rows in the dataframe
3.	info(): prints the summary of the dataframe
4.	describe(): gives a nice overview of the main aggregated values over each column


In [28]:
people.head()

Unnamed: 0,weight,birthyear,children,hobby
alice,68,1985,,Biking
bob,83,1984,3.0,Dancing
charles,112,1992,0.0,


In [29]:
people.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, alice to charles
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   weight     3 non-null      int64  
 1   birthyear  3 non-null      int64  
 2   children   2 non-null      float64
 3   hobby      2 non-null      object 
dtypes: float64(1), int64(2), object(1)
memory usage: 200.0+ bytes


# <font color=darkblue> Part 3: Writing custom functions </font>

In [None]:
#Importing libraries
import os
import tarfile
import urllib
import urllib.request

#Defining URL and PATH constants
DATA_URL = "http://www.realclimate.org/data/senators_sunspots.txt"
DATA_PATH = os.path.join("datasets", "demo1DataSet")

In [None]:
print(DATA_PATH)

datasets/demo1DataSet


In [None]:
# My custom function to fetch data from a web URL
def fetch_data_from_url(data_url=DATA_URL, data_path=DATA_PATH):
    if not os.path.isdir(data_path):
        os.makedirs(data_path)
    txt_path = os.path.join(data_path, "demo1.txt")
    urllib
    urllib.request.urlretrieve(data_url, txt_path)
    txt_file = open(txt_path, "r")
    print(txt_file.read())

In [None]:
# Calling the custom fetch function
fetch_data_from_url()

Republicans in Senate  (Since 1959)
Year
Number  
Yr   Republicans_in_Senate
1960 36		
1962 34		
1964 32		
1966 36		
1968 43		
1970 44		
1972 42		
1974 38		
1976 38		
1978 41		
1980 53		
1982 54		
1984 53		
1986 45		
1988 45 
1990 44 
1992 43  
1994 52  
1996 55   
1998 55  
2000 50 
2002 51 
2004 55  
2006 49 

YEARLY MEAN SUNSPOT NUMBERS
Year
Sunspot Number
Yr  Sunspot_Number 
1960 112.3  
1961  53.9  
1962  37.6  
1963  27.9  
1964  10.2  
1965  15.1  
1966  47.0  
1967  93.8  
1968 105.9 
1969 105.5 
1970 104.5 
1971  66.6  
1972  68.9  
1973  38.0  
1974  34.5  
1975  15.5  
1976  12.6  
1977  27.5  
1978  92.5  
1979 155.4 
1980 154.6 
1981 140.4 
1982 115.9 
1983  66.6  
1984  45.9  
1985  17.9  
1986  13.4  
1987  29.4  
1988 100.2 
1989 157.6 
1990 142.6 
1991 145.7 
1992  94.3  
1993  54.6  
1994  29.9  
1995  17.5  
1996   8.6   
1997  21.5  
1998  64.3  
1999  93.3  
2000 119.6 
2001 111.0 
2002 104.0 
2003  63.7  
2004  40.4  
2005  29.8  
2006  15.2  

Data sources:						

In [None]:
# Import libraries
import pandas as pd

# My custom function to load data from the database
def load_data(data_path=DATA_PATH):
    print(DATA_PATH)
    txt_path = os.path.join(data_path, "demo1.txt")
    return pd.read_csv(txt_path)

In [None]:
# Verifying my data
data = load_data()
data.head()

datasets/demo1DataSet


Unnamed: 0,Republicans in Senate (Since 1959)
0,Year
1,Number
2,Yr Republicans_in_Senate
3,1960 36\t\t
4,1962 34\t\t
