### Lesson 1

--> Pandas  
--> Series  
--> Explicit Indexing using Series  
--> Series with Dictionary  
--> Constructing Series Objects  
--> DataFrame Object  
--> Constructing DataFrames using 2D arrays  

### Pandas 

In [1]:
import pandas as pd  # We are importing the pandas library with an alias of pd 

### Series 

In [2]:
# Series is a 1D array that contains indexed data 

data = pd.Series([0.25,0.50,0.75,1])  # in the given array we can see the output in the form of indexed data 
                                       
print(data)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [4]:
print(data.values,'\n')   # This gives all the values present in the data variable 
                        

print(data.index)      # This gives the starting index and the ending index in the array

[0.25 0.5  0.75 1.  ] 

RangeIndex(start=0, stop=4, step=1)


In [6]:
print(data[1],'\n')  # Gives the first value in the data variable


print(data[1:3])   # Gives the 1st and 2nd values present in the data variable 

0.5 

1    0.50
2    0.75
dtype: float64


### Explicit Indexing using Series

In [10]:
data = pd.Series([0.25,0.50,0.75,1,1.25,1.50])
print(a)

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
f    1.50
dtype: float64


In [11]:
data = pd.Series([0.25,0.50,0.75,1,1.25,1.50],
             index = ['a','b','c','d','e','f'])     # By using index we can add our own index values 

print(data)

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
f    1.50
dtype: float64


In [15]:
print(data['a'],'\n')  # This gives the value of data that is present at index 'a' -> 0.25

print(data['c'])  # This gives the value of the data that is present at index 'c' -> 0.75

0.25 

0.75


In [16]:
data = pd.Series([0.25,0.50,0.75,1,1.25,1.50],
             index = [4,'b',6,'d',10,'f'])  # We can even modify the values as we want 

print(data)

4     0.25
b     0.50
6     0.75
d     1.00
10    1.25
f     1.50
dtype: float64


In [17]:
print(data[4],'\n')  # This gives the value of data that is present at index 4 -> 0.25

print(data['f'])  # This gives the value of the data present at index 'f' -> 1.50

0.25 

1.5


### Series with Dictionary

In [18]:
population_dict = {
    'Hyderabad' : 1399229,
    'Banglore': 983333,
    'Delhi' : 8700000,
    'Mumbai' : 21000000
}                                            # We created a dictionary of population for different states in India

population = pd.Series(population_dict)     # We converted the dictionary to a pandas series

print(population)

Hyderabad     1399229
Banglore       983333
Delhi         8700000
Mumbai       21000000
dtype: int64


In [21]:
print(population,'\n')  # Gives the values present in the variable population

print('Population of Delhi-',population['Delhi'],'\n')  # Gives the population of Delhi only [Indexing]

print(population['Hyderabad':'Delhi'])   # Gives the values from Hyderabad to Delhi inclusive [Slicing]

Hyderabad     1399229
Banglore       983333
Delhi         8700000
Mumbai       21000000
dtype: int64 

Population of Delhi- 8700000 

Hyderabad    1399229
Banglore      983333
Delhi        8700000
dtype: int64


### Constructing Series Objects

In [22]:
data = pd.Series([2,3,4])   # This stores the values 2,3,4

print(data)

0    2
1    3
2    4
dtype: int64


In [23]:
data = pd.Series(100,index=[1,2,3,4,5]) # This stores 5 values of 100 with different index

print(data)

1    100
2    100
3    100
4    100
5    100
dtype: int64


In [25]:
data = pd.Series({1:'a',3:'d',2:'c'})  # This stores the dictionary values 

print(data)

1    a
3    d
2    c
dtype: object


In [27]:
data = pd.Series({1:'a',3:'c',2:'b'},index=[2,3])  # This gives the values of explicit indexes 

print(data)

2    b
3    c
dtype: object


### DataFrame Object

In [31]:
area_dictionary = {
    'Rajasthan':342239,
    'Hyderabad':112077,
    'Mumbai':307713,
    'Uttar Pradesh': 240928,
    'Banglore':191791,
}                                # We are creating a dictionary of areas in different states 

area = pd.Series(area_dictionary)      # We are converting the area_dictionary to a Pandas Series object and storing it inside a variable area  
print(area)

Rajasthan        342239
Hyderabad        112077
Mumbai           307713
Uttar Pradesh    240928
Banglore         191791
dtype: int64


In [32]:
states = pd.DataFrame({'population':population,
                      'area':area})    # As we have already created a population and area in Pandas Series Object 
                                       # We are now converting it into a DataFrame and storing it inside states variable
print(states)

               population      area
Banglore         983333.0  191791.0
Delhi           8700000.0       NaN
Hyderabad       1399229.0  112077.0
Mumbai         21000000.0  307713.0
Rajasthan             NaN  342239.0
Uttar Pradesh         NaN  240928.0


In [33]:
print(states.index)   # states.index provides all the index values of states

Index(['Banglore', 'Delhi', 'Hyderabad', 'Mumbai', 'Rajasthan',
       'Uttar Pradesh'],
      dtype='object')


In [34]:
print(states.columns)  # states.columns provides all the column values present in the states 

Index(['population', 'area'], dtype='object')


In [35]:
print(states['area'])  # states['area'] provides all the index values and the area column only

Banglore         191791.0
Delhi                 NaN
Hyderabad        112077.0
Mumbai           307713.0
Rajasthan        342239.0
Uttar Pradesh    240928.0
Name: area, dtype: float64


In [36]:
print(states['population']) # states['population'] provides all the index values and the population column only 

Banglore           983333.0
Delhi             8700000.0
Hyderabad         1399229.0
Mumbai           21000000.0
Rajasthan               NaN
Uttar Pradesh           NaN
Name: population, dtype: float64


In [42]:
strikers_dict = {
    'Portugal': 'Ronaldo',
    'Egypt':'Salah',
    'Brazil':'Neymar',
    'England':'Rooney',
    'Spain':'Morata'
}                                 # We created a Strikers dictionary 

strikers = pd.Series(strikers_dict)  # Over here we convert the dictionary into a Pandas Series and store it in the variable strikers 
print(strikers)

Portugal    Ronaldo
Egypt         Salah
Brazil       Neymar
England      Rooney
Spain        Morata
dtype: object


In [44]:
goals_dict={
    'Portugal': 900 ,
    'Egypt':300,
    'Brazil':400,
    'England':366,
    'Spain':200
}                           # Here we are making a dictionary of goals 

goals = pd.Series(goals_dict)  # Over here we are converting the dictionary to Pandas Series and storing it in the variable goals
print(goals)

Portugal    900
Egypt       300
Brazil      400
England     366
Spain       200
dtype: int64


In [46]:
Football_dict = {
    'Players':strikers,
    'Total Goals': goals
}   # We are combining the dictionary of values that has already been created 

Football = pd.DataFrame(Football_dict)  # We are converting the combined dictionary into a Pandas DataFrame Object and storing it in the variable Football
print(Football)

          Players  Total Goals
Portugal  Ronaldo          900
Egypt       Salah          300
Brazil     Neymar          400
England    Rooney          366
Spain      Morata          200


### Constructing DataFrames using 2D arrays

In [52]:
import pandas as pd
import numpy as np

data = pd.DataFrame(np.random.randint(1,100),
                   columns =[0,1],
                   index = ['a','b','c'])      # In here we are using the 
                                               # np.random.randint(1,100) --> which generates a random number between 1-100
                                                # columns = [0,1]  -> With this we are writing the column names to be 0 and 1 
                                                # index = ['a','b','c']  -> With this we are writing the index values to be a,b and c

print(data)

    0   1
a  33  33
b  33  33
c  33  33
