In [3]:
import pandas as pd
import numpy as np
days = pd.Series(['Mon', 'Tue', 'Wed'])
print(days)

0    Mon
1    Tue
2    Wed
dtype: object


In [4]:
# creating from numpy array
days_lst = np.array(['Mon', 'Tue', 'Wed'])
pd_days = pd.Series(days_lst)
print(pd_days)

0    Mon
1    Tue
2    Wed
dtype: object


In [5]:
# creating from regular python list
days_lst = ['Mon', 'Tue', 'Wed']
pd_days = pd.Series(days_lst)
print(pd_days)

0    Mon
1    Tue
2    Wed
dtype: object


In [55]:
days_lst = pd.Series(['Mon', 'Tue', 'Wed'], index=['a', 'b', 'c'])

In [7]:
days_lst

a    Mon
b    Tue
c    Wed
dtype: object

In [8]:
# creating from dictionary
d1 = pd.Series({'a':'Monday', 'b':'Tuesday', 'c':'Wednesday'})
d1

a       Monday
b      Tuesday
c    Wednesday
dtype: object

In [9]:
d1[0]

'Monday'

In [10]:
d1[1:]

b      Tuesday
c    Wednesday
dtype: object

In [11]:
d1['c']

'Wednesday'

In [56]:
d1[0]

'Monday'

# DataFrame

In [12]:
print(pd.DataFrame())

Empty DataFrame
Columns: []
Index: []


In [57]:
pd.DataFrame()

In [20]:
# create dataframe from dict
df_dict = {'Country':['Ghana', 'Nigeria', 'Togo', 'Kenya'],
           'Capital':['Accra', 'Abuja', 'Lome', 'Nairobi'],
           'Population':['100000', '140000', '67000', '87000'],
           'Age':['60', '70', '75', '80'],
          }
df = pd.DataFrame(df_dict, index=[2,4,6,8])
df

Unnamed: 0,Country,Capital,Population,Age
2,Ghana,Accra,100000,60
4,Nigeria,Abuja,140000,70
6,Togo,Lome,67000,75
8,Kenya,Nairobi,87000,80


In [16]:
# creating from regular python list
df_list = [['Ghana', 'Accra', 12000, 87],
           ['Nigeria', 'Abuja', 45000, 67],
           ['Togo', 'Lome', 23000, 35],
           ['Kenya', 'Nairobi', 10000, 57],
          ]
df1 = pd.DataFrame(df_list, columns=['Country', 'Capital', 'Population', 'Age'], index=[1,2,3,4])
df1

Unnamed: 0,Country,Capital,Population,Age
1,Ghana,Accra,12000,87
2,Nigeria,Abuja,45000,67
3,Togo,Lome,23000,35
4,Kenya,Nairobi,10000,57


#### retrieving values using at, iat, iloc and loc 

In [21]:
# select the row in the 'at' index 3
df.iloc[3]

Country         Kenya
Capital       Nairobi
Population      87000
Age                80
Name: 8, dtype: object

In [22]:
df.iloc[2:]

Unnamed: 0,Country,Capital,Population,Age
6,Togo,Lome,67000,75
8,Kenya,Nairobi,87000,80


In [24]:
df['Country']

2      Ghana
4    Nigeria
6       Togo
8      Kenya
Name: Country, dtype: object

In [25]:
df.at[6, 'Country'] # select using "at"

'Togo'

In [31]:
df.iat[2, 1] # i.e row 2 col 1

'Lome'

In [34]:
df.iat[3, 3]

'80'

In [35]:
df1.iat[2,3]

35

In [36]:
df1['Capital']

1      Accra
2      Abuja
3       Lome
4    Nairobi
Name: Capital, dtype: object

In [38]:
df['Age'].sum()

'60707580'

In [39]:
df.mean()

Population    2.500004e+20
Age           1.517690e+07
dtype: float64

In [40]:
df.describe()

Unnamed: 0,Country,Capital,Population,Age
count,4,4,4,4
unique,4,4,4,4
top,Togo,Lome,87000,75
freq,1,1,1,1


In [41]:
df1.describe()

Unnamed: 0,Population,Age
count,4.0,4.0
mean,22500.0,61.5
std,16051.998837,21.625602
min,10000.0,35.0
25%,11500.0,51.5
50%,17500.0,62.0
75%,28500.0,72.0
max,45000.0,87.0


##### missing data 

In [45]:
df_dict2 = {'Name':['James','Yemen', 'Caro', np.nan],
            'Profession':['Researcher','Trader', 'Teacher', 'Doctor'],
            'Experience':[12, np.nan, 10, 8],
            'Height':[np.nan, 175, 180, 150],
           }
df3 = pd.DataFrame(df_dict2, index=[1,2,3,4])
df3

Unnamed: 0,Name,Profession,Experience,Height
1,James,Researcher,12.0,
2,Yemen,Trader,,175.0
3,Caro,Teacher,10.0,180.0
4,,Doctor,8.0,150.0


###  Check the cells with missing values as True

In [46]:
df3.isnull()

Unnamed: 0,Name,Profession,Experience,Height
1,False,False,False,True
2,False,False,True,False
3,False,False,False,False
4,True,False,False,False


In [47]:
# remove rows with missing values
df3.dropna()

Unnamed: 0,Name,Profession,Experience,Height
3,Caro,Teacher,10.0,180.0


In [48]:
data = {'apples':[2,4,6,4],
       'oranges':[0,5,3,1]}
p = pd.DataFrame(data)
p

Unnamed: 0,apples,oranges
0,2,0
1,4,5
2,6,3
3,4,1


In [49]:
p.loc[0]

apples     2
oranges    0
Name: 0, dtype: int64

In [50]:
def header(msg):
    print('-' * 50)
    print('[' + msg + ']')

In [53]:
header("1. load hard coded data into dataframe")
p = pd.DataFrame(data)
p

--------------------------------------------------
[1. load hard coded data into dataframe]


Unnamed: 0,apples,oranges
0,2,0
1,4,5
2,6,3
3,4,1


In [52]:
p = pd.DataFrame(data)

### Converting back to csv, json or sql

In [58]:
df.to_csv('myData.csv')

In [59]:
df.to_json('myData.json')

### getting info about the data
###### .info() should be the first command you run after loading your data

In [60]:
df.info() #provides the essential details about your dataset

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4 entries, 2 to 8
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Country     4 non-null      object
 1   Capital     4 non-null      object
 2   Population  4 non-null      object
 3   Age         4 non-null      object
dtypes: object(4)
memory usage: 320.0+ bytes


In [61]:
df

Unnamed: 0,Country,Capital,Population,Age
2,Ghana,Accra,100000,60
4,Nigeria,Abuja,140000,70
6,Togo,Lome,67000,75
8,Kenya,Nairobi,87000,80


In [62]:
df.shape

(4, 4)