In [2]:
import numpy as np
import pandas as pd

s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])
s

a    0.141852
b   -0.480461
c   -1.155063
d    0.616478
e    0.112359
dtype: float64

In [3]:
#instantiating series from dicts
d= {"b": 1, "a": 0, "c": 2}
pd.Series(d)

b    1
a    0
c    2
dtype: int64

In [4]:
#from a scaler value
pd.Series(5.0, index=["a", "b", "c", "d", "e"])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [5]:
#Series name attribute
s = pd.Series(np.random.randn(5), name="something")
s.name

'something'

In [6]:
s2 = s.rename("different")
s2.name
#Note that s and s2 refer to different objects.

'different'

In [7]:
#DataFrame from dict of series or dicts
d = {
    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),
}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [8]:
#When a particular set of columns is passed along with a dict of data, the passed columns override the keys in the dict.
pd.DataFrame(d, index=["d", "b", "a"], columns=["two", "three"])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [9]:
#From dict of ndarrays / lists
d = {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]}
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [10]:
#Index overriding
pd.DataFrame(d, index=["a", "b", "c", "d"])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [11]:
#From structured or record arra
data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "S10")])
data[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [12]:
#From a list of dicts
data2 = [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [13]:
#From a dict of tuples
pd.DataFrame(
    {
        ("a", "b"): {("A", "B"): 1, ("A", "C"): 2},
        ("a", "a"): {("A", "C"): 3, ("A", "B"): 4},
        ("a", "c"): {("A", "B"): 5, ("A", "C"): 6},
        ("b", "a"): {("A", "C"): 7, ("A", "B"): 8},
        ("b", "b"): {("A", "D"): 9, ("A", "B"): 10},
    }
)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


In [14]:
#From a Series
ser = pd.Series(range(3), index=list("abc"), name="ser")
pd.DataFrame(ser)

Unnamed: 0,ser
a,0
b,1
c,2


In [15]:
#From a list of namedtuples
from collections import namedtuple
Point = namedtuple("Point", "x y")
pd.DataFrame([Point(0, 0), Point(0, 3), (2, 3)])

Unnamed: 0,x,y
0,0,0
1,0,3
2,2,3


In [16]:
'''
From a list of dataclasses
Data Classes as introduced in PEP557, can be passed into the DataFrame constructor. 
Passing a list of dataclasses is equivalent to passing a list of dictionaries.
Please be aware, that all values in the list should be dataclasses, mixing types in the list would result in a TypeError.
'''
from dataclasses import make_dataclass
Point = make_dataclass("Point", [("x", int), ("y", int)])
pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])

Unnamed: 0,x,y
0,0,0
1,0,3
2,2,3


In [17]:
pd.DataFrame.from_dict(dict([("A", [1, 2, 3]), ("B", [4, 5, 6])]))

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [18]:
pd.DataFrame.from_records(data, index="C")

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'Hello',1,2.0
b'World',2,3.0


In [19]:
df["two"]

a    1.0
b    2.0
c    3.0
d    4.0
Name: two, dtype: float64

In [20]:
df["three"] = df["one"] * df["two"]
df["flag"] = df["one"] > 2
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [21]:
#Popping or deleting columns
del df["two"]
three = df.pop("three")
df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [22]:
# Inserting a scalar value will naturally be propagated to fill the column
df["scaler propagation"] = "propagated"
df

Unnamed: 0,one,flag,scaler propagation
a,1.0,False,propagated
b,2.0,False,propagated
c,3.0,True,propagated
d,,False,propagated


In [23]:
claims = pd.read_csv(r"C:\Users\Lenovo\Downloads\claim1.csv")
claims.assign()
claims.head()

Unnamed: 0,PAYROLL,CLAIMANT'S NAME,ID NUMBER,DATE OF LOSS,INTIMATION DATE,BRITAM CLAIM NO.,NATURE OF LOSS,SCHEME,NATURE OF INJURY,SERVICE,...,Documentation TAT,DV ISSUED DATE,DV ISSUE TAT,DV RECEIVED DATE,date paid/declined,DATE REMITTED,TAT FINANCE,payment requisition no.,STATION,Column1
0,29302402.0,JOSEPH KAHUGU WAMBUGU,29302402.0,22/06/2023,04-Oct-23,552/092/5/001136/2023/10,DEATH,GPA,ASSAULT,KENYA POLICE,...,10/04/2023,04-Oct-23,0.0,11-Oct-23,11-Oct-23,18-Oct-23,7.0,"202012523202310, 202012519202310, 202012517202...",,
1,29531579.0,ANTONY ALFRED MANYASI,29531579.0,05/09/2023,20-Apr-23,552/092/5/000916/2023/06,INJURY,GPA,RTA,KENYA POLICE,...,153,,400.42,,,,,,NAIROBI,
2,10846010.0,JOSEPH MACHARIA MWANGI,10846010.0,17/06/2023,28-Jul-23,552/092/5/001041/2023/08,DEATH,GPA,FALL,KENYA PRISONS,...,0,,454.42,04-Aug-23,04-Aug-23,05-Aug-23,1.0,2.02E+14,,
3,,TIMOTHY MUGENDI RUCHAH,,12/01/2023,08-Jun-23,552/110/5/004133/2023/11,INJURY,WIBA,BRAIN INJURY,ADMINISTRATION POLICE,...,14,26-Jun-23,4.0,,,,,,NAIROBI,
4,30122505.0,SILAS MURIUKI KINOTI,30122505.0,17/02/2023,27-Apr-23,552/092/5/000999/2023/07,INJURY,GPA,BRAIN INJURY,KENYA POLICE,...,85,,461.42,,,,,,NAIROBI,


In [24]:
dfa = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
dfa.assign(C=lambda x: x["A"] + x["B"], D=lambda x: x["A"] + x["C"])

Unnamed: 0,A,B,C,D
0,1,4,5,6
1,2,5,7,9
2,3,6,9,12


In [25]:
#Data alignment and arithmetic
df = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])
df2 = pd.DataFrame(np.random.randn(7, 3), columns=["A", "B", "C"])
df + df2

Unnamed: 0,A,B,C,D
0,1.092525,-0.079718,-0.783852,
1,-1.421064,0.67099,1.457734,
2,0.196921,0.459533,-1.402146,
3,0.322272,-0.010171,0.323456,
4,1.861684,1.599329,-1.405427,
5,-0.699761,0.9212,-1.502602,
6,2.056315,2.796435,-0.472262,
7,,,,
8,,,,
9,,,,


In [27]:
#To transpose, access the T attribute or DataFrame.transpose(), similar to an ndarray: Rows become columns, and columns become rows
df[:5].T

Unnamed: 0,0,1,2,3,4
A,0.37305,-2.219523,0.757649,0.51112,0.298717
B,0.399486,0.539548,-0.145287,-0.0954,0.563987
C,-1.276466,0.35201,-0.156209,0.31589,-1.207597
D,-0.744515,-0.306572,-1.006615,0.803615,0.826904
