In [7]:
# Import statements and basic sample datas

import numpy as np
import pandas as pd
from urllib.request import urlopen
from datascience import *


# These lines set up graphing capabilities.
import matplotlib # Library for Data visualization and plot   
%matplotlib inline 
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import warnings
warnings.simplefilter('ignore', FutureWarning)

# url address of US census website. (Data soiurce)
census_url = 'https://www2.census.gov/programs-surveys/' 

# url-link of 2010-2015 population data csv file.
pop_url = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2015/national/asrh/nc-est2015-agesex-res.csv'
pop_tbl = Table().read_table(pop_url) # Create a table based on the csv file.

# Refined version of US population table 
ref_pop_tbl = pop_tbl.select('SEX', 'AGE', 4, 9).relabeled(2, '2010').relabeled(3, '2015')
ref_pop_tbl 

SEX,AGE,2010,2015
0,0,3951330,3978038
0,1,3957888,3968564
0,2,4090862,3966583
0,3,4111920,3974061
0,4,4077551,4020035
0,5,4064653,4018158
0,6,4073013,4019207
0,7,4043046,4148360
0,8,4025604,4167887
0,9,4125415,4133564


# Data Types and Arrays

## Data Types

### Text
   
   > Represented by a string value (a sequence of characters) in Python.
   
   - Single quotes (') and double quotes (") are both valid, but the types of opening and closing quotation marks must match.
   
   - 

### String methods

####  `.replace( )`
  - original_string.replace(old_substring, new_substring)
 
#### `int( )`, `float( )`, `str( )`
   - int(str | float)
   > Converts a string of digits or a float to an integer ("int") value|
   
   - float(str | int) 
   > Converts a string of digits (perhaps with a decimal point) or an int to a decimal ("float") value
   
   - str(any_value)  
   > Converts any value to a string

#### `len( )`
> takes a single string as its argument and returns the number of characters (including spaces) in the string



## Array and Table

### Array 

   > collection of values of the same type. 
   
  - Index 
     > 

### Array methods

#### `make_array( )`

- make_array( elements ... )
 > Each argument you pass to `make_array` will be in the array it returns

#### `np.array( )`

- np.array([elements ... ])
 > Constructor method of the Array data structure through Numpy library

#### `np.arange( )`

- np.arange(start, stop, step)
 > Evaluates to an array with all the numbers starting at start and counting up by step, stopping before stop is reached.
 
 
#### `np.mean( )`

- np.mean(p_arr)
 > Returns the mean of p_arr 

### Table
 
 > 
 

### Table operations

#### `Table( ).with_columns ( )`

 - Table().with_columns(p_label, param_arr, ... )
  
  > put arrays of the same length into a Table, a 2-dimensional type of dataset
      


#### `.read_table( )`

 - Table.read_table("CSV_FILE")
  
  > Loading a table from a .csv file

#### `.column( )`

 - Table.column(p_label) 
    
  > returns the values in p_label column as an array.
  
  
#### `.take( )`

- Table.take(row_idx_arr)
 
 > Returns a new table with only those rows in the row_idx_arr.
 
   - row_idx_arr
   > Array of row indices to take from the Table.  
   
   
- Table.take(np.arange(start, stop, step))
 > Use .take() in conjunction with np.arange() to take the first few rows of a table.
   
 
     

### `where` Method

#### `.where( )`

 - tbl.where( bool_arr )
 
  > Return new table of rows whose corresponding indices in bool_arr are True.
  
 - tbl.where( label, `condition`)
 
  > Return new table of rows that are True after the `condition`.
  
   - ***are*** `.conditinal_func( )`
       
       - .{above | below}\_or_equal( )
       
       - .contained_in(param_arr)
           
       - . 
    


# Functions and Visualizations

## Functions

### Function
 

#### `def` Statement

```
 [def] [func_Name](parameter):
    """[Documentation]""" 
    [Body]
    [return Statement]    
``` 

# Previous notes below here.

### 2) Series :  

   > 1. Series : one-dimensional labelled array capable of holding data of any type 
   > 2. Possible to create a Series from Lists and Dictionaries
   > 3. Labels : need not be unique but must be a hashable
   > 4. Index : The axis labels
   

#### Fundamental methods of Series

1. pd.Series(param_data, index_array)
  
  > 1. Constructor method of the Series data structure from Pandas
  > 2. Index_array (Optional) : Array of Indices of datas in param_datas
  > 3. Possible sources of param_data : Dictionary | List | A single element

2. pd.Series(single_element, index_array)

  > 1. Constructor method of the Series data structure from Pandas
  > 2. num_rows of Series == Number of elements in index_array 
  > 3. 

#### Examples of Series

In [3]:
# Array of an indices of data from param_data to create a Series.
idx_arr = number_array * 2


# Dictionary data structure of data for Series 
data_dict = {'Element_1' : 10, 'Element_2' : 20, 'Element_3' : 30} 


# Constructor method of the Series data structure from Pandas. 
# Using idx_arr, an array of indices of datas, as indexes on the leftmost of Series 
num_ser1 = pd.Series(boolean_array, idx_arr)
num_ser1 


# Constructor method of the Series data structure from Pandas. 
# Dictionary data structure of data as a paramenter to create a Series 
dic_ser = pd.Series(data_dict)
dic_ser


# Constructor method of the Series data structure from Pandas. 
# Single data as a paramenter to create a scalar Series according to idx_arr.
scalar_ser = pd.Series(7, idx_arr)
scalar_ser

2     7
4     7
6     7
8     7
10    7
dtype: int64

### 3) DataFrame :

1. Table type data structure from Data C8 Courses

#### Fundamental methods of DataFrame

1. pd.DataFrame({'Label_1' : Series_1, 'Label_2' : Series_2})
    > Constructor of the DataFrame data structure of Pandas
        
2. tmp_dframe.apply(param_func)
    > param_func applied to the elements of 'param_Label' column from tmp_dframe
    

#### Examples of DataFrame

In [4]:
## Examples of Array # Constructor of DataFrame structure from Pandas 
num_bool_tbl = pd.DataFrame({'Numbers' : number_array, 'Booleans' : boolean_array})
print(num_bool_tbl)

# Apply function on the elements of the column of the table data structure.
sum_num_tbl = num_tbl.apply(np.sum)
sum_num_tbl


   Numbers  Booleans
0        1     False
1        2     False
2        3     False
3        4      True
4        5      True


Numbers    15
dtype: int64

## [B] Table Manipulation

### Fundamental Methods about table manipulation

#### 1. TABLE_tmp.show(param_num)
   
   > 1. Displays param_num amount of the contents of the TABLE_tmp.
   
   **[PANDAS] ~ display(DataFrame_tmp)** > {i.e. TABLE_tmp == DataFrame_tmp} 
   


In [9]:
# param_num : Number of data to show
param_num = 2

# Displays param_num amount of the contents of the TABLE_tmp.
display(num_tbl)

Unnamed: 0,Numbers
0,1
1,2
2,3
3,4
4,5


#### 2. TABLE_tmp.join

   > 1. 