In [2]:
import pandas as pd
import numpy as np

In [3]:
# multiindex
idx  = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)])
vals = [100, 200, 300, 400]

df = pd.Series(vals, index = idx)

df

a  1    100
   2    200
b  1    300
   2    400
dtype: int64

In [4]:
df[:,1]

a    100
b    300
dtype: int64

In [5]:
df.unstack()

Unnamed: 0,1,2
a,100,200
b,300,400


In [6]:
# new column
new_df = pd.DataFrame({'tot'   : df,
                       'ayhaga': [99, 88, 77, 66]})

new_df

Unnamed: 0,Unnamed: 1,tot,ayhaga
a,1,100,99
a,2,200,88
b,1,300,77
b,2,400,66


In [7]:

df = pd.DataFrame(np.random.rand(4, 2),
                  index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns = ['x', 'y'])

df

Unnamed: 0,Unnamed: 1,x,y
a,1,0.781306,0.943062
a,2,0.230391,0.671136
b,1,0.140736,0.485831
b,2,0.848635,0.537521


In [9]:
# multiindex using series
df = pd.Series({('a', 1): 100, ('a', 2): 200, 
                ('b', 1): 300, ('b', 2): 400,
                ('c', 1): 500, ('c', 2): 600})
df

a  1    100
   2    200
b  1    300
   2    400
c  1    500
   2    600
dtype: int64

In [20]:

idx  = pd.MultiIndex.from_product([['a', 'b'], [1, 2]],     # 2 * 2 > (4)rows
                                 names = ['letter', 'number'])

cols = pd.MultiIndex.from_product([['x', 'y', 'z'], [1, 2]],
                                 names = ['xyz', 'number']) # 3 * 2 > (6)cols

data = np.round(np.random.randint(0, 22, (4, 6)))           # 4 * 6 > cells

df = pd.DataFrame(data, index = idx, columns = cols)

df

Unnamed: 0_level_0,xyz,x,x,y,y,z,z
Unnamed: 0_level_1,number,1,2,1,2,1,2
letter,number,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,15,17,17,20,20,8
a,2,4,6,21,7,21,21
b,1,11,21,0,12,8,20
b,2,1,4,0,13,4,19


In [21]:
df['x', 2]

letter  number
a       1         17
        2          6
b       1         21
        2          4
Name: (x, 2), dtype: int32

In [22]:
df.loc['a']

xyz,x,x,y,y,z,z
number,1,2,1,2,1,2
number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1,15,17,17,20,20,8
2,4,6,21,7,21,21


In [23]:
df.loc[:, 'y']

Unnamed: 0_level_0,number,1,2
letter,number,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,17,20
a,2,21,7
b,1,0,12
b,2,0,13


In [24]:
df.loc[:, ('y', 2)]

letter  number
a       1         20
        2          7
b       1         12
        2         13
Name: (y, 2), dtype: int32

In [30]:
data = ['ahmed', 'ali', 'mazen', 'ayhaga']

pd.Series(data).str.capitalize()
# str. make series as a string with all string functions

0     Ahmed
1       Ali
2     Mazen
3    Ayhaga
dtype: object

In [35]:
pd.to_datetime('25th march 2002')

Timestamp('2002-03-25 00:00:00')

In [42]:
date = pd.to_datetime('4-1-2020')
date

Timestamp('2020-04-01 00:00:00')

In [45]:
print(date.day)
print(date.month)
print(date.year)
print(date.hour)
print(date.minute)
print(date.second)

1
4
2020
0
0
0


In [47]:
print(date.strftime('%d/%m/%Y'))
print(date.strftime('%d-%m-%Y'))

01/04/2020
01-04-2020


In [48]:
print(date.day_name())
print(date.month_name())
print(date.days_in_month)

Wednesday
April
30


In [61]:
d1 = pd.to_datetime('4-1-2020')
d2 = d1 + pd.DateOffset(days = 10) # after 10 days
# or
d3 = d1 + pd.Timedelta(days = 10)  # after 10 days
# 
print(d1)
print(d2)
print(d3)

2020-04-01 00:00:00
2020-04-11 00:00:00
2020-04-11 00:00:00


In [63]:
d2 = d1 + pd.DateOffset(months= 10) # before 10 months
# or
print(d2)

2021-02-01 00:00:00


In [65]:
d1 = pd.to_datetime('4-1-2020')

d3 = d1 + pd.to_timedelta(np.arange(10), 'D') # after 10 days
print(d1)
print(d3)

2020-04-01 00:00:00
DatetimeIndex(['2020-04-01', '2020-04-02', '2020-04-03', '2020-04-04',
               '2020-04-05', '2020-04-06', '2020-04-07', '2020-04-08',
               '2020-04-09', '2020-04-10'],
              dtype='datetime64[ns]', freq=None)


In [76]:
dates = ['2020-01-01', '2020-04-02', '2020-02-03']
d1 = pd.Series(dates, dtype = 'datetime64[ns]')
d1

0   2020-01-01
1   2020-04-02
2   2020-02-03
dtype: datetime64[ns]

In [77]:
dates = pd.DatetimeIndex(['2020-01-01', '2020-04-02', '2020-02-03'])

d1 = pd.Series(np.arange(3), index = dates)
d1

2020-01-01    0
2020-04-02    1
2020-02-03    2
dtype: int32

In [82]:
d = pd.date_range('2020-01-01', '2020-02-29')

d

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
               '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
               '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20',
               '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24',
               '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',
               '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-01',
               '2020-02-02', '2020-02-03', '2020-02-04', '2020-02-05',
               '2020-02-06', '2020-02-07', '2020-02-08', '2020-02-09',
               '2020-02-10', '2020-02-11', '2020-02-12', '2020-02-13',
               '2020-02-14', '2020-02-15', '2020-02-16', '2020-02-17',
               '2020-02-18', '2020-02-19', '2020-02-20', '2020-02-21',
               '2020-02-22', '2020-02-23', '2020-02-24', '2020-02-25',
      

In [83]:
d = pd.date_range('2020-01-01', periods = 10)
d

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10'],
              dtype='datetime64[ns]', freq='D')

In [85]:
d = pd.date_range('2020-01-01', periods = 10, freq = '2D')  # step of 2 days
d

DatetimeIndex(['2020-01-01', '2020-01-03', '2020-01-05', '2020-01-07',
               '2020-01-09', '2020-01-11', '2020-01-13', '2020-01-15',
               '2020-01-17', '2020-01-19'],
              dtype='datetime64[ns]', freq='2D')

In [86]:
d = pd.date_range('2020-01-01', periods = 10, freq = 'H')  # step of 1 hour
d

DatetimeIndex(['2020-01-01 00:00:00', '2020-01-01 01:00:00',
               '2020-01-01 02:00:00', '2020-01-01 03:00:00',
               '2020-01-01 04:00:00', '2020-01-01 05:00:00',
               '2020-01-01 06:00:00', '2020-01-01 07:00:00',
               '2020-01-01 08:00:00', '2020-01-01 09:00:00'],
              dtype='datetime64[ns]', freq='H')

In [87]:
d = pd.date_range('2020-01-01', periods = 10, freq = 'M')
d

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31'],
              dtype='datetime64[ns]', freq='M')

In [89]:
d = pd.timedelta_range(0, periods = 10, freq = 'H')
d

TimedeltaIndex(['0 days 00:00:00', '0 days 01:00:00', '0 days 02:00:00',
                '0 days 03:00:00', '0 days 04:00:00', '0 days 05:00:00',
                '0 days 06:00:00', '0 days 07:00:00', '0 days 08:00:00',
                '0 days 09:00:00'],
               dtype='timedelta64[ns]', freq='H')

In [90]:
d = pd.timedelta_range(0, periods = 10, freq = '2H30T40S') # step of 2 hours and 30 minutes and 40 seconds
d

TimedeltaIndex(['0 days 00:00:00', '0 days 02:30:40', '0 days 05:01:20',
                '0 days 07:32:00', '0 days 10:02:40', '0 days 12:33:20',
                '0 days 15:04:00', '0 days 17:34:40', '0 days 20:05:20',
                '0 days 22:36:00'],
               dtype='timedelta64[ns]', freq='9040S')