In [1]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML

pd.__version__

'1.4.3'

# Series

In [2]:
data_series = pd.Series([1, 2, 3], index=["a", "b", "c"], name="Sample")
data_series

a    1
b    2
c    3
Name: Sample, dtype: int64

In [3]:
data_series[1] == data_series["b"]

True

In [4]:
data_series.values, type(data_series.values)

(array([1, 2, 3], dtype=int64), numpy.ndarray)

In [5]:
data_series.to_list() # It Also Has Other Methods
data_series.to_dict()
data_series.to_json()

'{"a":1,"b":2,"c":3}'

In [6]:
data_series.index, type(data_series.index)

(Index(['a', 'b', 'c'], dtype='object'), pandas.core.indexes.base.Index)

# DataFrame

## Making DataFrames

In [7]:
 np.arange(1200).reshape((400, 3)).shape

(400, 3)

In [8]:
df1 = pd.DataFrame(
    data    = np.arange(1200).reshape((400, 3)),
    columns = ["COL_1", "COL_2", "COL_3"],
    index   = [x + 1 for x in range(400)]
)
df1

Unnamed: 0,COL_1,COL_2,COL_3
1,0,1,2
2,3,4,5
3,6,7,8
4,9,10,11
5,12,13,14
...,...,...,...
396,1185,1186,1187
397,1188,1189,1190
398,1191,1192,1193
399,1194,1195,1196


In [9]:
data = {
    "ID": ["1233", "1234", "1250", "123468", "456897", "987456"],
    "Name":  ["Mahyar", "Sajjad", "Ali", "Poorya", "Amin", "Bagher"],
    "Family": ["Riazati", "Yazdan Parast", "Babaii", "ZamanVaziri", "Anvari", "Tabrizi"],
    "Age": [23, 24, 20, 22, 23, 28],
    "Birth Place": ["Tehran", "Isfahan", "Tehran", "Tehran", "Tehran", "Tabriz"]
}
df = pd.DataFrame(data=data)
df

Unnamed: 0,ID,Name,Family,Age,Birth Place
0,1233,Mahyar,Riazati,23,Tehran
1,1234,Sajjad,Yazdan Parast,24,Isfahan
2,1250,Ali,Babaii,20,Tehran
3,123468,Poorya,ZamanVaziri,22,Tehran
4,456897,Amin,Anvari,23,Tehran
5,987456,Bagher,Tabrizi,28,Tabriz


## Indexing & Selection

### By Columns

In [10]:
df["Birth Place"].value_counts()

Tehran     4
Isfahan    1
Tabriz     1
Name: Birth Place, dtype: int64

In [11]:
df[["ID", "Age", "Birth Place"]]

Unnamed: 0,ID,Age,Birth Place
0,1233,23,Tehran
1,1234,24,Isfahan
2,1250,20,Tehran
3,123468,22,Tehran
4,456897,23,Tehran
5,987456,28,Tabriz


### By Methods

In [12]:
df1.head(10)

Unnamed: 0,COL_1,COL_2,COL_3
1,0,1,2
2,3,4,5
3,6,7,8
4,9,10,11
5,12,13,14
6,15,16,17
7,18,19,20
8,21,22,23
9,24,25,26
10,27,28,29


In [13]:
df1.tail(2)

Unnamed: 0,COL_1,COL_2,COL_3
399,1194,1195,1196
400,1197,1198,1199


In [14]:
df1.head(199).tail(1)

Unnamed: 0,COL_1,COL_2,COL_3
199,594,595,596


## By Row Number

In [15]:
row_199 = df1.iloc[198]
row_199 # Just For Visualizing

COL_1    594
COL_2    595
COL_3    596
Name: 199, dtype: int32

In [16]:
df1.iloc[50:60]

Unnamed: 0,COL_1,COL_2,COL_3
51,150,151,152
52,153,154,155
53,156,157,158
54,159,160,161
55,162,163,164
56,165,166,167
57,168,169,170
58,171,172,173
59,174,175,176
60,177,178,179


### Select Column

In [17]:
df1.iloc[50:60, :2]

Unnamed: 0,COL_1,COL_2
51,150,151
52,153,154
53,156,157
54,159,160
55,162,163
56,165,166
57,168,169
58,171,172
59,174,175
60,177,178


### By Index Name

In [18]:
df = df.set_index("ID")
df

Unnamed: 0_level_0,Name,Family,Age,Birth Place
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1233,Mahyar,Riazati,23,Tehran
1234,Sajjad,Yazdan Parast,24,Isfahan
1250,Ali,Babaii,20,Tehran
123468,Poorya,ZamanVaziri,22,Tehran
456897,Amin,Anvari,23,Tehran
987456,Bagher,Tabrizi,28,Tabriz


In [19]:
df.loc["1233"]

Name            Mahyar
Family         Riazati
Age                 23
Birth Place     Tehran
Name: 1233, dtype: object

In [20]:
df.loc["1233": "1250"]

Unnamed: 0_level_0,Name,Family,Age,Birth Place
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1233,Mahyar,Riazati,23,Tehran
1234,Sajjad,Yazdan Parast,24,Isfahan
1250,Ali,Babaii,20,Tehran


### Select Column

In [21]:
df.loc["1233": "1250", ["Age", "Birth Place"]]

Unnamed: 0_level_0,Age,Birth Place
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1233,23,Tehran
1234,24,Isfahan
1250,20,Tehran


## Boolean & Conditional

In [22]:
cond_birth_place = df["Birth Place"] == "Tehran"
cond_age_limit = df.Age > 20
df[cond_age_limit & cond_birth_place]

Unnamed: 0_level_0,Name,Family,Age,Birth Place
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1233,Mahyar,Riazati,23,Tehran
123468,Poorya,ZamanVaziri,22,Tehran
456897,Amin,Anvari,23,Tehran


# Dataframe Values & Properties

In [23]:
df.values, type(df.values)

(array([['Mahyar', 'Riazati', 23, 'Tehran'],
        ['Sajjad', 'Yazdan Parast', 24, 'Isfahan'],
        ['Ali', 'Babaii', 20, 'Tehran'],
        ['Poorya', 'ZamanVaziri', 22, 'Tehran'],
        ['Amin', 'Anvari', 23, 'Tehran'],
        ['Bagher', 'Tabrizi', 28, 'Tabriz']], dtype=object),
 numpy.ndarray)

In [24]:
df.Age.values

array([23, 24, 20, 22, 23, 28], dtype=int64)

In [25]:
df.index

Index(['1233', '1234', '1250', '123468', '456897', '987456'], dtype='object', name='ID')

# Loading Datasets

## CSV

In [28]:
df = pd.read_csv("./my_df.csv")
df

Unnamed: 0,ID,Name,Family,Age,Birth Place
0,1233,Mahyar,Riazati,23,Tehran
1,1234,Sajjad,Yazdan Parast,24,No Place
2,1250,Ali,Babaii,20,Tehran
3,123468,Poorya,ZamanVaziri,22,Tehran
4,456897,Amin,Anvari,No Age,Tehran
5,987456,No Name,Tabrizi,28,Tabriz


## HTML

In [29]:
import requests
URL = "http://www.tsetmc.com/Loader.aspx?ParTree=15"
response = requests.get(URL)
response, dir(response)[37:]

(<Response [200]>,
 ['apparent_encoding',
  'close',
  'connection',
  'content',
  'cookies',
  'elapsed',
  'encoding',
  'headers',
  'history',
  'is_permanent_redirect',
  'is_redirect',
  'iter_content',
  'iter_lines',
  'json',
  'links',
  'next',
  'ok',
  'raise_for_status',
  'raw',
  'reason',
  'request',
  'status_code',
  'text',
  'url'])

In [30]:
tables = pd.read_html(response.text)
type(tables), len(tables)

(list, 56)

In [31]:
for table in tables:
    display(table)

Unnamed: 0,0,1
0,وضعیت بازار,بسته
1,شاخص کل,"1,451,404.12 (8095.85)"
2,شاخص كل (هم وزن),"399,078.56 454.83"
3,ارزش بازار,"54,824,471.934 B"
4,اطلاعات قیمت,01/5/26 17:51:57
5,تعداد معاملات,403439
6,ارزش معاملات,"51,771.906 B"
7,حجم معاملات,8.992 B


Unnamed: 0,0,1
0,01/5/26 15:10,اطلاعيه درخصوص عدم تاييد بخشي از معاملات (سپاه...
1,01/5/26 15:10,اطلاعيه درخصوص عدم تاييد بخشي از معاملات (پارس...
2,01/5/26 13:43,توقف نماد (ثشرق1) به علت افشاي اطلاعات با اهمي...
3,01/5/26 10:53,توقف نماد معاملاتي(ركيشح)
4,01/5/26 10:51,توقف نماد معاملاتي(تاپيكو)
5,01/5/26 10:49,توقف نماد معاملاتي(تاصيكو)
6,01/5/26 10:46,توقف نماد معاملاتي(فايرا)
7,01/5/26 10:41,بازگشايي نماد معاملاتي(چكارن)


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1,آخرین معامله,آخرین معامله.1,کمترین,بیشترین,تعداد,حجم,ارزش
0,خساپا - سايپا,2170,4.53,2179,4.96,2107,2179,26599,1.640 B,"3,558.359 B"
1,خودرو - ايران‌ خودرو,2220,3.21,2240,4.14,2161,2257,18859,800.126 M,"1,776.379 B"
2,شپنا - پالايش نفت اصفهان,5940,(2.62),5910,(3.11),5830,6060,13376,193.162 M,"1,147.679 B"
3,خگستر - گسترش‌سرمايه‌گذاري‌ايران‌خودرو,3527,1.73,3533,1.9,3430,3639,10830,309.738 M,"1,092.458 B"
4,شبندر - پالايش نفت بندرعباس,7130,(3.65),7130,(3.65),7020,7330,10673,130.578 M,930.597 B
5,وتجارت - بانك تجارت,1799,2.57,1820,3.76,1750,1837,8973,368.340 M,662.798 B
6,شتران - پالايش نفت تهران,4917,(2.52),4880,(3.25),4850,5022,7885,123.605 M,607.722 B


Unnamed: 0,شاخص,انتشار,مقدار,تغییر,درصد,بیشترین,کمترین
0,شاخص كل,18:18,1451404.12,(8095.85),(0.55),1460755.59,1451395.53
1,شاخص قيمت(وزني-ارزشي),18:18,336914.28,(1879.29),(0.55),339085.05,336912.3
2,شاخص كل (هم وزن),18:18,399078.56,454.83,0.11,399679.62,399075.49
3,شاخص قيمت (هم وزن),18:18,239024.56,272.42,0.11,239384.65,239022.82
4,شاخص آزاد شناور,18:18,1848025.08,(8771.51),(0.47),1858754.63,1848021.2
5,شاخص بازار اول,18:18,1094975.56,(5776.75),(0.52),1102524.23,1094963.87
6,شاخص بازار دوم,18:18,2824855.65,(16748.41),(0.59),2842182.98,2824854.37


Unnamed: 0,نماد,قیمت پایانی,تاثیر
0,فارس,8210,(1295.15)
1,شبندر,7130,(959.72)
2,شپديس,173940,(873.14)
3,شپنا,5940,(804.34)
4,كگل,11030,(687.93)
5,شتران,4917,(571.24)
6,فولاد,5560,(560.92)


Unnamed: 0,گروه,ارزش بازار,تعداد معاملات,حجم معاملات,ارزش معاملات
0,34-خودرو,"2,088,382.155 B",82822,3.317 B,"8,747.536 B"
1,23-فراورده نفتي,"3,848,824.955 B",35401,481.698 M,"2,953.730 B"
2,44-شيميايي,"13,213,298.853 B",33178,285.466 M,"2,306.663 B"
3,57-بانكها,"3,519,622.769 B",25782,909.708 M,"2,046.209 B"
4,27-فلزات اساسي,"9,598,784.937 B",26783,370.289 M,"1,828.398 B"
5,56-سرمايه گذاريها,"3,262,907.120 B",12828,223.949 M,"1,088.092 B"
6,73-اطلاعات و ارتباطات,"119,075.628 B",11095,272.733 M,991.389 B


Unnamed: 0,تاریخ,ارزش بازار
0,01/5/25,"55,135,691.460 B"
1,01/5/24,"55,297,933.627 B"
2,01/5/23,"55,295,692.754 B"
3,01/5/22,"55,426,135.487 B"
4,01/5/19,"54,720,023.320 B"
5,01/5/18,"54,850,700.975 B"
6,01/5/15,"56,978,727.475 B"


Unnamed: 0,شاخص,انتشار,مقدار,تغییر,درصد,بیشترین,کمترین,Unnamed: 7
0,73-اطلاعات و ارتباطات,18:18,1000.16,43.43,4.54,1000.16,963.21,
1,19-محصولات چرمي,18:18,17917.16,710.76,4.13,17917.16,17216.79,
2,32-وسايل ارتباطي,18:18,43114.81,1398.98,3.35,43114.81,41471.99,
3,34-خودرو,18:18,236320.79,5880.38,2.55,236405.98,235426.92,
4,23-فراورده نفتي,18:18,5796933.91,(150615.75),(2.53),5885223.36,5796933.91,
5,14-ساير معادن,18:18,124577.22,(3889.59),(3.03),128466.77,124577.22,
6,10-ذغال سنگ,18:18,51675.9,(2575.48),(4.75),54057.85,51675.9,


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1
0,بورس,5052,7.15
1,دانا,1013,5.96
2,وتوصا,8060,5.22
3,اتكام,3052,4.95
4,حفارس,10580,4.86
5,تكشا,25160,4.83
6,بالبر,39380,4.82


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1
0,فنوال,24240,(4.98)
1,كطبس,18670,(4.74)
2,جم پيلن,118930,(4.6)
3,وصنعت,1884,(4.32)
4,پدرخش,5800,(4.13)
5,ثنوسا,3253,(3.98)
6,سصفها,45050,(3.88)


Unnamed: 0,نماد,آخرین معامله,آخرین معامله.1
0,بكاب,25470,4.64
1,لابسا,6580,3.79
2,واعتبار,2719,3.23
3,پدرخش,5950,2.59
4,لوتوس,6930,2.51
5,كفرا,7360,2.51
6,قشهد,13790,2.38


Unnamed: 0,نماد,آخرین معامله,آخرین معامله.1
0,نمرينو,69680,(3.65)
1,اميد,5650,(3.09)
2,فخاس,18040,(2.96)
3,كنور,15810,(2.89)
4,ولملت,3615,(2.8)
5,ثامان,12120,(2.49)
6,داسوه,31480,(2.48)


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1
0,دلرح,17060,1.31
1,داروح,11650,1.22
2,وسپهح,2852,(0.83)
3,پرديسح,1385,(2.81)
4,ركيشح,1803,(6.14)


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1


Unnamed: 0,نماد,قیمت,ارزش
0,سپر4,15216,149.972 B
1,فردا4,13769,"1,819.586 B"
2,كارين4,10151,964.345 B
3,افران4,17229,240.000 B


Unnamed: 0,نماد,قیمت,حجم,ارزش,تعداد
0,افران,17228,110.170 M,"1,898.012 B",1112
1,فردا,13768,99.414 M,"1,368.732 B",198
2,هماي,10152,100.576 M,"1,021.049 B",1012
3,كمند,10062,54.326 M,546.627 B,58
4,پارند,10060,29.856 M,300.349 B,299
5,سپر,15219,14.330 M,218.088 B,144
6,سپاس,10060,20.268 M,203.897 B,4


Unnamed: 0,نماد,قیمت,حجم,ارزش,تعداد
0,هماي,10154,100.102 M,"1,016.434 B",1002
1,فردا,13770,29.896 M,411.666 B,209
2,فيروزا,29548,7.048 M,208.255 B,71
3,انرژي1,33960,4.800 M,163.008 B,48
4,ياقوت,15501,10.050 M,155.785 B,101
5,سپر,15222,9.991 M,152.079 B,100
6,كمند,10064,14.901 M,149.964 B,2


Unnamed: 0,نماد,تاریخ,تعدیل شده,قبل از تعدیل
0,غنوش,1401/5/26,9040,9042
1,حفاري,1401/5/25,3446,3636
2,چكارن,1401/5/24,2163,2178
3,كروي,1401/5/23,18640,21640
4,كترام,1401/5/22,7800,8190
5,وسينا,1401/5/15,1944,1973
6,فولاد,1401/5/12,9470,11170


Unnamed: 0,نماد,تاریخ,سهام جدید,سهام قبلی
0,كبورس,1401/5/29,14.000 B,18.200 B
1,نبورس,1401/5/29,14.000 B,18.200 B
2,كبورس,1401/5/26,18.200 B,14.000 B
3,نبورس,1401/5/26,18.200 B,14.000 B
4,بورس,1401/5/26,18.200 B,14.000 B
...,...,...,...,...
1632,غدشت,1387/10/14,53.453 M,30.000 M
1633,ثاخت,1387/9/23,1.500 B,"1,000.000 M"
1634,غپينو,1387/9/23,210.000 M,70.000 M
1635,وبهمن,1387/9/23,2.420 B,2.200 B


Unnamed: 0,نماد,ارزش بازار,تغییر
0,خودرو,"669,676 B","20,814.269 B"
1,خساپا,"424,258 B","18,377.992 B"
2,وتجارت,"402,843 B","10,076.676 B"
3,وبصادر,"308,272 B","8,767.699 B"
4,وبملت,"844,302 B","8,647.414 B"
5,شفن,"195,604 B","7,428.000 B"
6,بورس,"91,946 B","6,133.400 B"


Unnamed: 0,نماد,تعداد,حجم,ارزش,ارزش بازار
0,خساپا,26599,1.640 B,"3,558.359 B","424,258 B"
1,كمند,6007,281.407 M,"2,831.715 B","201,260 B"
2,افران,5093,147.781 M,"2,546.092 B","68,916 B"
3,صايپا4092,1,2.500 M,"2,375.250 B","7,126 B"
4,ميدكو2,1,125.500 M,"2,215.075 B","2,471,000 B"
5,فخاس2,1,107.410 M,"1,998.900 B","221,459 B"
6,دارا يكم,14927,18.423 M,"1,867.076 B","59,622 B"


Unnamed: 0,نماد,تعداد,حجم,ارزش,ارزش بازار
0,خساپا,26599,1.640 B,"3,558.359 B","424,258 B"
1,خودرو,18859,800.126 M,"1,776.379 B","669,676 B"
2,وتجارت,8973,368.340 M,662.798 B,"402,843 B"
3,شستا,5566,320.642 M,309.270 B,"1,578,753 B"
4,خگستر,10830,309.738 M,"1,092.458 B","139,687 B"
5,كمند,6007,281.407 M,"2,831.715 B","201,260 B"
6,هاي وب,6935,260.408 M,746.191 B,"79,296 B"


Unnamed: 0,نماد,ارزش بازار,تغییر
0,فارس,"4,018,795 B","48,950.000 B"
1,شبندر,"957,858 B","36,272.319 B"
2,شپديس,"1,043,640 B","33,000.000 B"
3,شپنا,"1,128,600 B","30,400.000 B"
4,كگل,"2,206,000 B","26,000.000 B"
5,شتران,"835,890 B","21,590.000 B"
6,فولاد,"2,946,800 B","21,200.000 B"


Unnamed: 0,نماد,علت
0,ايران‌ارقام‌ (مرقام),عدم رعایت الزامات پذیرش (بند 1 ماده 38 دستورال...
1,معدني‌ دماوند (كدما),عدم ارائه صورت‌های مالی 12 ماهه حسابرسی شدۀ شر...
2,تكنوتار (تكنو),مشمول مادۀ 141 قانون تجارتعدم رعایت الزامات پذ...
3,سرمايه‌ گذاري‌ شاهد (ثشاهد),عدم ارائه گزارش فعالیت ماهانه تیر ماه
4,صنايع‌ آذرآب‌ (فاذر),عدم ارائه صورت‌های مالی 12 ماهه حسابرسی شدۀ شر...
5,پارس‌ الكتريك‌ (لپارس),عدم رعایت الزامات پذیرش (بند 1 ماده 38 دستورال...
6,سايپا (خساپا),عدم رعایت الزامات پذیرش (بند 1 ماده 38 دستورال...
7,ايران‌ خودرو (خودرو),عدم رعایت الزامات پذیرش (بند 1 ماده 38 دستورال...
8,لبنيات‌ پاك‌ (غپاك),عدم ارائه صورت‌های مالی 12 ماهه حسابرسی شدۀ شر...
9,بانك‌پارسيان‌ (وپارس),عدم رعایت الزامات پذیرش (بند 1 ماده 38 دستورال...


Unnamed: 0,نماد,علت


Unnamed: 0,نماد,علت
0,س.سهام عدالت استان مازندران (وسمازن),عدم ارائه گزارش تفسیری مدیریت میاندوره ای 6 ما...
1,شركت س استان هرمزگان (وسهرمز),عدم ارائه گزارش فعالیت ماهانه فروردین ماهعدم ا...
2,پارس‌ خودرو (خپارس),3 سال مشمول ماده 141 قانون تجارت
3,شركت س استان فارس (وسفارس),عدم ارائه گزارش تفسیری مدیریت میاندوره ای 6 ما...
4,شركت س استان خراسان شمالي (وسخراش),عدم ارائه گزارش فعالیت ماهانه تیر ماهبررسی وضع...
5,شركت س استان كردستان (وسكرد),بررسی وضعیت شفافیت اطلاعاتی ناشرعدم ارائه صورت...
6,شركت س استان آذربايجان غربي (وساغربي),عدم ارائه صورت های مالی 12 ماهه حسابرسی شدهبرر...
7,شركت س استان خراسان جنوبي (وسخراج),بررسی وضعیت شفافیت اطلاعاتی ناشرعدم ارائه گزار...
8,شركت س استان خوزستان (وسخوز),عدم ارائه صورت های مالی 12 ماهه حسابرسی شدهبرر...
9,شركت س استان همدان (وسهمدا),عدم ارائه گزارش تفسیری مدیریت میاندوره ای 6 ما...


Unnamed: 0,0,1
0,وضعیت بازار,بسته
1,شاخص کل,"19,044.96 (57.46)"
2,ارزش بازار اول و دوم,"10,504,741.432 B"
3,ارزش بازار پایه,"3,388,158.041 B"
4,اطلاعات قیمت,01/5/26 17:39:18
5,تعداد معاملات,163913
6,ارزش معاملات,"162,013.170 B"
7,حجم معاملات,3.055 B


Unnamed: 0,نوع ابزار,تعداد,حجم,ارزش
0,سهام,142438,1.852 B,"8,756.889 B"
1,اوراق مالی اسلامی,1651,26.154 M,"20,760.237 B"
2,عملیات بازار باز,32,130.587 M,"122,448.287 B"
3,سایر,19812,1.046 B,"10,048.212 B"


Unnamed: 0,0,1
0,01/5/26 17:43,تعيين كارگزار ناظر در نماد معاملاتي (پرسپوليس1)
1,01/5/26 17:40,تعيين كارگزار ناظر در نماد معاملاتي (استقلال1)
2,01/5/26 17:13,عرضه عمده سهام كنترلي در نماد معاملاتي (ولراز4)
3,01/5/26 16:40,عدم تأييد بخشي از معاملات در نمادهاي معاملاتي ...
4,01/5/26 16:39,عدم تأييد معاملات در نماد معاملاتي (بتك1)
5,01/5/26 16:39,عدم تأييد معاملات در نماد معاملاتي (شتهران1)
6,01/5/26 16:39,عدم تأييد معاملات در نماد معاملاتي (نبروج1)
7,01/5/26 16:38,عدم تأييد معاملات در نماد معاملاتي (تملي6121)


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1,آخرین معامله,آخرین معامله.1,کمترین,بیشترین,تعداد,حجم,ارزش
0,دي - بانك دي,716,(1.65),715,(1.79),710,740,7641,343.273 M,245.921 B
1,ريشمك - توليد و صادرات ريشمك,17090,(4.36),17580,(1.62),16800,18200,3009,11.526 M,197.031 B
2,زكشت - كشاورزي مكانيزه اصفهان كشت,27600,4.35,27450,3.78,26350,28000,2638,6.739 M,186.014 B
3,ددانا - داروسازي دانا,26050,(1.33),26100,(1.14),25750,26850,2246,1.419 M,36.977 B
4,شاوان - پالايش نفت لاوان,18280,(4.04),18140,(4.78),18130,18950,2207,10.331 M,188.831 B
5,فگستر - گسترش صنايع روي ايرانيان,8550,(0.7),8730,1.39,8180,8820,1972,6.847 M,58.550 B
6,كرمان - س. توسعه و عمران استان كرمان,783,(1.14),784,(1.01),775,806,1952,87.261 M,68.362 B


Unnamed: 0,تاریخ,ارزش بازار
0,01/5/25,"10,537,681.997 B"
1,01/5/24,"10,583,197.057 B"
2,01/5/23,"10,593,035.726 B"
3,01/5/22,"10,627,855.637 B"
4,01/5/19,"10,563,737.201 B"
5,01/5/18,"10,564,243.545 B"
6,01/5/15,"10,562,549.878 B"


Unnamed: 0,نماد,قیمت پایانی,تاثیر
0,زاگرس,105750,(18.93)
1,آريا,71700,(13.71)
2,شاوان,18280,(11.17)
3,شراز,11910,(10.97)
4,بپاس,26000,10.55
5,دماوند,18230,7.1
6,كگهر,43750,(4.53)


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1
0,نوين,1427,5.39
1,وآوا,2742,5.38
2,كتوكا,6940,5.31
3,فروي,13080,5.23
4,شتوكا,12540,5.03
5,كايزد,16010,4.91
6,مفاخر,45550,4.83


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1
0,ولشرق,2055,(4.82)
1,وگستر,2910,(4.65)
2,شراز,11910,(4.41)
3,ريشمك,17090,(4.36)
4,شبصير,96200,(4.33)
5,تاپكيش,11360,(4.22)
6,شاوان,18280,(4.04)


Unnamed: 0,نماد,آخرین معامله,آخرین معامله.1
0,وتعاون,1192,4.38
1,ميهن,2733,3.25
2,ريشمك,17580,2.87
3,هجرت,17430,2.17
4,دتوليد,6770,2.11
5,فگستر,8730,2.11
6,قشير,5532,1.99


Unnamed: 0,نماد,آخرین معامله,آخرین معامله.1
0,شرانل,44100,(2.97)
1,دتوزيع,28000,(2.95)
2,سغدير,13800,(2.2)
3,بهپاك,6300,(2.17)
4,كزغال,15710,(2.06)
5,حريل,3157,(1.68)
6,فتوسا,7740,(1.65)


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1


Unnamed: 0,نماد,قیمت,حجم,ارزش,تعداد
0,اعتماد,46681,45.303 M,"2,114.791 B",454
1,گام020361,834000,1.942 M,"1,619.878 B",79
2,ثبات,12184,124.783 M,"1,520.362 B",1250
3,آوند,10137,100.015 M,"1,013.851 B",6
4,سپيدما,13235,50.000 M,661.750 B,500
5,كارا,11966,51.070 M,611.101 B,6
6,آساميد,15595,28.229 M,440.235 B,283


Unnamed: 0,نماد,قیمت,حجم,ارزش,تعداد
0,اوصتا,23843,14.942 M,356.250 B,1
1,اراد42,1000000,237000,237.000 B,10
2,آوند,10138,20.157 M,204.352 B,2
3,دارا,10145,19.909 M,201.980 B,1
4,كارا,11967,14.178 M,169.664 B,10
5,آكورد,40760,4.127 M,168.226 B,43
6,آساميد,15605,10.000 M,156.050 B,100


Unnamed: 0,نماد,تاریخ,تعدیل شده,قبل از تعدیل
0,قچار,1401/5/24,10660,10810
1,شصدف,1401/5/22,32250,34400
2,شراز,1401/5/15,14140,15990
3,تبرك,1401/5/15,6330,6530
4,فماك,1401/5/12,19234,19290
5,گدنا,1401/5/1,12550,12601
6,فولاي,1401/5/1,9370,9670


Unnamed: 0,نماد,تاریخ,سهام جدید,سهام قبلی
0,دبالك,1401/5/18,770.000 M,520.000 M
1,اتكاي,1401/5/18,8.000 B,5.200 B
2,اپرداز,1401/5/10,2.250 B,"1,000.000 M"
3,شراز,1401/5/3,11.000 B,1.028 B
4,بخاور,1401/5/3,2.400 B,2.200 B
...,...,...,...,...
657,كارآفريني,1389/7/6,648.000 M,23.300 M
658,پترو گچساران,1389/7/6,259.700 M,100000
659,فريم,1389/6/8,615256,196.000 M
660,نكا,1389/6/8,1.782 M,4.900 M


Unnamed: 0,نماد,ارزش بازار,تغییر
0,بپاس,"1,512,529 B","5,817.420 B"
1,دماوند,"100,630 B","3,919.200 B"
2,بپيوند,"27,570 B","1,140.000 B"
3,پخش,"50,330 B",770.000 B
4,رنيك,"21,558 B",714.100 B
5,حخزر,"18,598 B",648.000 B
6,سرچشمه,"32,015 B",596.700 B


Unnamed: 0,نماد,تعداد,حجم,ارزش,ارزش بازار
0,اراد864,2,60.000 M,"57,865.500 B","96,443 B"
1,اراد954,2,20.000 M,"19,167.690 B","47,919 B"
2,گام020361,409,10.000 M,"8,340.000 B","12,510 B"
3,اخزا9024,3,7.977 M,"7,365.053 B","46,162 B"
4,اراد404,2,6.200 M,"5,977.532 B","38,565 B"
5,افاد44,2,6.012 M,"5,977.521 B","49,792 B"
6,گام0203614,8,5.000 M,"4,164.450 B","12,493 B"


Unnamed: 0,نماد,تعداد,حجم,ارزش,ارزش بازار
0,دي,7641,343.273 M,245.921 B,"96,660 B"
1,كارا2,28,269.000 M,269.000 M,100 M
2,حآفرين2,3,161.361 M,590.583 B,"27,186 B"
3,كارا,2047,121.627 M,"1,455.494 B","1,197 B"
4,كرمان,1952,87.261 M,68.362 B,"33,372 B"
5,ثبات,1668,83.689 M,"1,019.730 B","48,740 B"
6,سپيدما2,9,80.500 M,80.500 M,6 B


Unnamed: 0,نماد,ارزش بازار,تغییر
0,زاگرس,"253,800 B","10,440.000 B"
1,آريا,"985,984 B","7,563.336 B"
2,شاوان,"146,240 B","6,160.000 B"
3,شراز,"131,010 B","6,050.000 B"
4,كگهر,"1,093,750 B","2,500.000 B"
5,شبصير,"48,100 B","2,175.000 B"
6,شگويا,"232,150 B","1,916.613 B"


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1,آخرین معامله,آخرین معامله.1,کمترین,بیشترین,تعداد,حجم,ارزش
0,وسالت - بانك قرض الحسنه رسالت,46950,1.19,47750,2.91,47750,47750,101,323689,15.456 B
1,خاور - ايران خودرو ديزل,1867,2.98,1867,2.98,1837,1867,1947,110.466 M,206.216 B
2,تفارس - تامين سرمايه خليج فارس,6660,(0.75),6710,0,6550,6790,1501,24.553 M,163.578 B
3,شلرد - كود شيميايي اوره لردگان,10110,(2.32),10140,(2.03),10040,10350,1393,15.528 M,156.975 B
4,شپلي - پلي اكريل ايران,6310,(0.94),6310,(0.94),6210,6440,616,6.390 M,40.302 B
5,وبرق - س.کارکنان صنعت برق زنجان وقزوي,59150,(0.92),58600,(1.84),58400,60600,622,482784,28.567 B
6,سنوين - سرمايه گذاري اقتصاد نوين,4657,1.04,4747,2.99,4747,4747,25,772543,3.667 B


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1,آخرین معامله,آخرین معامله.1,کمترین,بیشترین,تعداد,حجم,ارزش
0,شمواد - توليد مواداوليه الياف مصنوعي,73750,(1.99),73750,(1.99),73750,74700,350,1.444 M,106.523 B
1,فاهواز - نورد و لوله اهواز,4396,(0.88),4347,(1.98),4347,4435,106,1.404 M,6.139 B
2,لازما - كارخانه هاي صنعتي آزمايش,22050,(1.78),22050,(1.78),22050,22400,89,557482,12.301 B
3,وشمال - س. چشم انداز توسعه شمال,21450,1.9,21450,1.9,21300,21450,90,374816,8.038 B
4,سفارود - كارخانه فارسيت درود,8240,(0.48),8120,(1.93),8120,8120,25,161621,1.312 B
5,قجام - فرآوردههاي غذايي وقندتربت‌جام‌,5089,0.43,5156,1.76,4990,5159,626,5.662 M,28.813 B
6,وزمين - بانك ايران زمين,8600,0,8430,(1.98),8430,8430,14,20599,173.650 M


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1,آخرین معامله,آخرین معامله.1,کمترین,بیشترین,تعداد,حجم,ارزش
0,فسديد - لوله‌وتجهيزات‌ سديد - ورشكسته,57500,0.0,56950,(0.96),56950,56950,3,357,20.331 M
1,سپرمي - پرميت‌,38850,0.91,38850,0.91,38850,38850,42,138317,5.374 B


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1,آخرین معامله,آخرین معامله.1,کمترین,بیشترین,تعداد,حجم,ارزش
0,غنيلي - مجتمع توليدي نيلي صنعت كرمان,4760,0,4760,0,0,0,0,0,0
1,غنيليح - ح.مجتمع توليدي نيلي صنعت كرمان,1020,0,1000,(1.96),0,0,0,0,0
2,شگامرن - مجتمع پترو صنعت گامرون,268283,0,268283,0,0,0,0,0,0
3,سفاروم - سيمان سفيد اروميه,1000,0,1000,0,0,0,0,0,0
4,كيا - كيا الكترود شرق,7661,0,7661,0,0,0,0,0,0
5,غكولاك - كولاك‌ شرق‌,1000,0,1000,0,0,0,0,0,0
6,منارا - ايران نارا,1000,0,1000,0,0,0,0,0,0


Unnamed: 0,نماد,قیمت پایانی,قیمت پایانی.1,آخرین معامله,آخرین معامله.1,کمترین,بیشترین,تعداد,حجم,ارزش
0,شسيماب - سيماب رزين,1000,0,1000,0,0,0,0,0,0
1,غگز - گز سكه,8100,0,8100,0,0,0,0,0,0


Unnamed: 0,نماد,قیمت,قیمت.1,تعداد,حجم,ارزش
0,اختيارخ تفارس-7500-14020629 (ضتفارس607),990,1.85,7,127,125.710 M
1,اختيارخ تفارس-7000-14020629 (ضتفارس606),1201,(13.6),7,144,172.990 M
2,اختيارخ فرابورس-9901-14011014 (ضفرابورس1000),1660,10.67,2,21,34.850 M
3,اختيارف حافرين-5000-14011214 (طحافرين1208),1173,41.84,2,101,118.500 M
4,تامين مالي جمعي صرفا اهداي سود (كرونا2),10000,0,1,30,300000
5,اختيارخ فرابورس-11901-14011014 (ضفرابورس1001),850,(46.57),1,1,850000


Unnamed: 0,نماد,قیمت,حجم,ارزش,تعداد
0,ضتفارس600,3520,100,352000,1
1,ضتفارس601,3120,100,312000,1
2,ضتفارس602,2720,100,272000,1
3,طحافرين1208,310,800,248000,8
4,ضتفارس603,2310,100,231000,1
5,ضتفارس607,410,400,164000,4
6,ضحافرين1200,2201,50,110050,1


Unnamed: 0,نماد,قیمت,حجم,ارزش,تعداد
0,ضتفارس600,4048,100,404800,1
1,ضتفارس601,3381,100,338100,1
2,ضتفارس602,3041,100,304100,1
3,ضتفارس603,2702,100,270200,1
4,ضتفارس604,2362,100,236200,1
5,ضوسپهر300,8250,25,206250,1
6,ضتفارس605,1921,100,192100,1


Unnamed: 0.1,Unnamed: 0,علت
0,بانك دي (دي),مشمول ماده 141 قانون تجارت به مدت 4 سال مالي م...
1,اعتباري ملل (وملل),عدم رعایت الزامات دستورالعمل اجرایی افشای اطلا...
2,بيمه ميهن (ميهن),عدم رعایت الزامات دستورالعمل اجرایی افشای اطلا...
3,قند شيروان قوچان و بجنورد (قشير),عدم رعایت الزامات دستورالعمل اجرایی افشای اطلا...


Unnamed: 0.1,Unnamed: 0,علت


Unnamed: 0,نماد,علت
0,عمران و توسعه شاهد (ثعمرا),عدم رعایت الزامات دستورالعمل اجرایی افشای اطلا...
1,بانك حكمت ايرانيان (حكمت),نیاز به بررسی وضعیت شفافیت اطلاعاتی ناشر
2,پتروشيمي كازرون (كازرو),نیاز به بررسی وضعیت شفافیت اطلاعاتی ناشر
3,پتروشيمي ممسني (ممسني),نیاز به بررسی وضعیت شفافیت اطلاعاتی ناشر
4,پتروشيمي مارون (مارون),نیاز به بررسی وضعیت شفافیت اطلاعاتی ناشر
5,صنايع پتروشيمي دهدشت (دهدشت),نیاز به بررسی وضعیت شفافیت اطلاعاتی ناشر
6,بازرگاني آينده سازان بهشت پارس (آينده),نیاز به بررسی وضعیت شفافیت اطلاعاتی ناشر
7,اعتباري توسعه (توسعه),نیاز به بررسی وضعیت شفافیت اطلاعاتی ناشر
8,كيوان (غيوان),ظنّ وقوع جرائم موضوع ماده 46 قانون بازار اوراق...
9,بانك آينده (وآيند),نیاز به بررسی وضعیت شفافیت اطلاعاتی ناشر


# Saving DataFrames

In [32]:
df.to_csv("my_df.csv")
!cat my_df.csv

,ID,Name,Family,Age,Birth Place
0,1233,Mahyar,Riazati,23,Tehran
1,1234,Sajjad,Yazdan Parast,24,No Place
2,1250,Ali,Babaii,20,Tehran
3,123468,Poorya,ZamanVaziri,22,Tehran
4,456897,Amin,Anvari,No Age,Tehran
5,987456,No Name,Tabrizi,28,Tabriz


In [33]:
pd.read_csv("my_df.csv", na_values=["No Age", "No Place", "No Name"])

Unnamed: 0.1,Unnamed: 0,ID,Name,Family,Age,Birth Place
0,0,1233,Mahyar,Riazati,23.0,Tehran
1,1,1234,Sajjad,Yazdan Parast,24.0,
2,2,1250,Ali,Babaii,20.0,Tehran
3,3,123468,Poorya,ZamanVaziri,22.0,Tehran
4,4,456897,Amin,Anvari,,Tehran
5,5,987456,,Tabrizi,28.0,Tabriz


# Pandas Arethmatics & Operations

## Simple Arethmatics

In [34]:
s1 = pd.Series([1, 2, 3], index=list("ABC"))
s2 = pd.Series([4, 5, 6], index=list("ABC"))
s1 + s2

A    5
B    7
C    9
dtype: int64

In [35]:
s1 = pd.Series([1, 2, 3], index=list("ABC"))
s2 = pd.Series([4, 5, 6], index=list("AED"))
s1 + s2
# s1.add(s2, fill_value=0)

A    5.0
B    NaN
C    NaN
D    NaN
E    NaN
dtype: float64

In [36]:
df1 = pd.DataFrame(
    data    = np.arange(1200).reshape((400, 3)),
    columns = ["COL_1", "COL_5", "COL_8"],
    index   = [x + 1 for x in range(400)]
)

df2 = pd.DataFrame(
    data    = np.arange(1200).reshape((400, 3)),
    columns = ["COL_1", "COL_2", "COL_3"],
    index   = [x + 1 for x in range(400)]
)

df1.add(df2, fill_value=0)

Unnamed: 0,COL_1,COL_2,COL_3,COL_5,COL_8
1,0,1.0,2.0,1.0,2.0
2,6,4.0,5.0,4.0,5.0
3,12,7.0,8.0,7.0,8.0
4,18,10.0,11.0,10.0,11.0
5,24,13.0,14.0,13.0,14.0
...,...,...,...,...,...
396,2370,1186.0,1187.0,1186.0,1187.0
397,2376,1189.0,1190.0,1189.0,1190.0
398,2382,1192.0,1193.0,1192.0,1193.0
399,2388,1195.0,1196.0,1195.0,1196.0


In [37]:
df1.subtract(df2, fill_value=0)

Unnamed: 0,COL_1,COL_2,COL_3,COL_5,COL_8
1,0,-1.0,-2.0,1.0,2.0
2,0,-4.0,-5.0,4.0,5.0
3,0,-7.0,-8.0,7.0,8.0
4,0,-10.0,-11.0,10.0,11.0
5,0,-13.0,-14.0,13.0,14.0
...,...,...,...,...,...
396,0,-1186.0,-1187.0,1186.0,1187.0
397,0,-1189.0,-1190.0,1189.0,1190.0
398,0,-1192.0,-1193.0,1192.0,1193.0
399,0,-1195.0,-1196.0,1195.0,1196.0


In [38]:
df1.divide(df2, fill_value=0)

Unnamed: 0,COL_1,COL_2,COL_3,COL_5,COL_8
1,,0.0,0.0,inf,inf
2,1.0,0.0,0.0,inf,inf
3,1.0,0.0,0.0,inf,inf
4,1.0,0.0,0.0,inf,inf
5,1.0,0.0,0.0,inf,inf
...,...,...,...,...,...
396,1.0,0.0,0.0,inf,inf
397,1.0,0.0,0.0,inf,inf
398,1.0,0.0,0.0,inf,inf
399,1.0,0.0,0.0,inf,inf


(`mul`, `mod`, `pow`)

## Setting Index 

### Time Indexing

In [39]:
nasdaq_df = pd.read_csv("./HistoricalPrices.csv")
nasdaq_df

nasdaq_df.Date = pd.to_datetime(nasdaq_df.Date)
display(nasdaq_df)
display(nasdaq_df.Date)

Unnamed: 0,Date,Open,High,Low,Close
0,2022-08-16,13082.64,13181.09,12979.24,13102.55
1,2022-08-15,12996.63,13146.06,12993.78,13128.05
2,2022-08-12,12866.31,13047.19,12821.22,13047.19
3,2022-08-11,12944.82,13026.24,12760.09,12779.91
4,2022-08-10,12793.44,12861.44,12698.61,12854.80
...,...,...,...,...,...
56,2022-05-25,11225.03,11511.90,11211.85,11434.74
57,2022-05-24,11326.44,11351.61,11092.48,11264.45
58,2022-05-23,11396.28,11552.07,11304.56,11535.27
59,2022-05-20,11542.67,11552.21,11035.69,11354.62


0    2022-08-16
1    2022-08-15
2    2022-08-12
3    2022-08-11
4    2022-08-10
        ...    
56   2022-05-25
57   2022-05-24
58   2022-05-23
59   2022-05-20
60   2022-05-19
Name: Date, Length: 61, dtype: datetime64[ns]

In [40]:
nasdaq_df.set_index("Date", inplace=True)
nasdaq_df

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-08-16,13082.64,13181.09,12979.24,13102.55
2022-08-15,12996.63,13146.06,12993.78,13128.05
2022-08-12,12866.31,13047.19,12821.22,13047.19
2022-08-11,12944.82,13026.24,12760.09,12779.91
2022-08-10,12793.44,12861.44,12698.61,12854.80
...,...,...,...,...
2022-05-25,11225.03,11511.90,11211.85,11434.74
2022-05-24,11326.44,11351.61,11092.48,11264.45
2022-05-23,11396.28,11552.07,11304.56,11535.27
2022-05-20,11542.67,11552.21,11035.69,11354.62


In [41]:
nasdaq_df.loc["2022-06"]

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-30,11048.25,11160.92,10850.01,11028.74
2022-06-29,11160.22,11226.33,11072.19,11177.89
2022-06-28,11542.24,11635.85,11177.68,11181.54
2022-06-27,11661.02,11677.49,11487.07,11524.55
2022-06-24,11351.31,11613.23,11337.78,11607.62
2022-06-23,11137.68,11260.27,11046.28,11232.19
2022-06-22,10941.95,11216.77,10938.06,11053.08
2022-06-21,10974.05,11164.99,10974.05,11069.3
2022-06-17,10697.55,10884.71,10638.72,10798.35
2022-06-16,10806.02,10831.07,10565.14,10646.1


In [42]:
nasdaq_df.loc["2022-06-08":"2022-06-15"]

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-15,10968.4,11244.26,10866.39,11099.15
2022-06-14,10897.43,10926.81,10733.04,10828.35
2022-06-13,10986.84,11071.48,10775.14,10809.23
2022-06-10,11543.88,11569.15,11328.27,11340.02
2022-06-09,12016.47,12115.07,11751.98,11754.23
2022-06-08,12147.28,12235.78,12052.7,12086.27


In [43]:
nasdaq_df.loc["2022-06-08":"2022-08-15"]

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-08-15,12996.63,13146.06,12993.78,13128.05
2022-08-12,12866.31,13047.19,12821.22,13047.19
2022-08-11,12944.82,13026.24,12760.09,12779.91
2022-08-10,12793.44,12861.44,12698.61,12854.8
2022-08-09,12557.49,12582.91,12438.86,12493.93
2022-08-08,12703.72,12855.16,12597.75,12644.46
2022-08-05,12538.81,12720.44,12525.77,12657.55
2022-08-04,12433.87,12699.64,12425.21,12720.58
2022-08-03,12433.87,12699.64,12425.21,12668.16
2022-08-02,12287.67,12503.34,12260.48,12348.76


## Sorting

In [44]:
nasdaq_df = nasdaq_df.sample(frac=1)
nasdaq_df

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-07-01,11006.83,11132.55,10922.71,11127.85
2022-05-26,11409.84,11796.97,11406.16,11740.65
2022-07-19,11515.00,11721.22,11448.97,11713.15
2022-06-08,12147.28,12235.78,12052.70,12086.27
2022-08-04,12433.87,12699.64,12425.21,12720.58
...,...,...,...,...
2022-06-28,11542.24,11635.85,11177.68,11181.54
2022-08-02,12287.67,12503.34,12260.48,12348.76
2022-06-10,11543.88,11569.15,11328.27,11340.02
2022-05-31,12137.54,12190.08,11942.50,12081.39


In [45]:
nasdaq_df.columns

Index([' Open', ' High', ' Low', ' Close'], dtype='object')

In [46]:
nasdaq_df.columns = [x.strip() for x in nasdaq_df.columns]
nasdaq_df.columns

Index(['Open', 'High', 'Low', 'Close'], dtype='object')

### By Index

In [47]:
nasdaq_df.sort_index(ascending=True) # Default

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-05-19,11364.40,11562.82,11313.31,11388.50
2022-05-20,11542.67,11552.21,11035.69,11354.62
2022-05-23,11396.28,11552.07,11304.56,11535.27
2022-05-24,11326.44,11351.61,11092.48,11264.45
2022-05-25,11225.03,11511.90,11211.85,11434.74
...,...,...,...,...
2022-08-10,12793.44,12861.44,12698.61,12854.80
2022-08-11,12944.82,13026.24,12760.09,12779.91
2022-08-12,12866.31,13047.19,12821.22,13047.19
2022-08-15,12996.63,13146.06,12993.78,13128.05


In [48]:
nasdaq_df.sort_index(ascending=False)

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-08-16,13082.64,13181.09,12979.24,13102.55
2022-08-15,12996.63,13146.06,12993.78,13128.05
2022-08-12,12866.31,13047.19,12821.22,13047.19
2022-08-11,12944.82,13026.24,12760.09,12779.91
2022-08-10,12793.44,12861.44,12698.61,12854.80
...,...,...,...,...
2022-05-25,11225.03,11511.90,11211.85,11434.74
2022-05-24,11326.44,11351.61,11092.48,11264.45
2022-05-23,11396.28,11552.07,11304.56,11535.27
2022-05-20,11542.67,11552.21,11035.69,11354.62


### By Columns

In [49]:
nasdaq_df.sort_values("High")

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-16,10806.02,10831.07,10565.14,10646.10
2022-06-17,10697.55,10884.71,10638.72,10798.35
2022-06-14,10897.43,10926.81,10733.04,10828.35
2022-06-13,10986.84,11071.48,10775.14,10809.23
2022-07-01,11006.83,11132.55,10922.71,11127.85
...,...,...,...,...
2022-08-10,12793.44,12861.44,12698.61,12854.80
2022-08-11,12944.82,13026.24,12760.09,12779.91
2022-08-12,12866.31,13047.19,12821.22,13047.19
2022-08-15,12996.63,13146.06,12993.78,13128.05


In [50]:
nasdaq_df.sort_values(["High", "Low"])

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-16,10806.02,10831.07,10565.14,10646.10
2022-06-17,10697.55,10884.71,10638.72,10798.35
2022-06-14,10897.43,10926.81,10733.04,10828.35
2022-06-13,10986.84,11071.48,10775.14,10809.23
2022-07-01,11006.83,11132.55,10922.71,11127.85
...,...,...,...,...
2022-08-10,12793.44,12861.44,12698.61,12854.80
2022-08-11,12944.82,13026.24,12760.09,12779.91
2022-08-12,12866.31,13047.19,12821.22,13047.19
2022-08-15,12996.63,13146.06,12993.78,13128.05


## Statistics

### Describing

In [53]:
car_csv = pd.read_csv("./cars_raw.csv")
car_csv.describe()

Unnamed: 0,Year,ConsumerRating,ConsumerReviews,SellerRating,SellerReviews,ComfortRating,InteriorDesignRating,PerformanceRating,ValueForMoneyRating,ExteriorStylingRating,ReliabilityRating,MinMPG,MaxMPG,Mileage
count,9379.0,9379.0,9379.0,9379.0,9379.0,9379.0,9379.0,9379.0,9379.0,9379.0,9379.0,9379.0,9379.0,9379.0
mean,2018.721719,4.702825,133.187014,4.412571,984.089988,4.771895,4.727391,4.69629,4.537083,4.782194,4.681746,22.755411,29.216548,37463.02335
std,2.221708,0.240795,154.98564,0.626258,1609.039864,0.217822,0.194391,0.253664,0.338098,0.171537,0.368161,14.812869,12.809783,24970.342569
min,2001.0,2.5,1.0,1.0,1.0,3.0,2.8,1.0,1.0,3.0,1.0,0.0,0.0,121.0
25%,2018.0,4.7,30.0,4.3,112.0,4.7,4.7,4.6,4.5,4.7,4.6,18.0,25.0,18666.5
50%,2019.0,4.8,75.0,4.6,542.0,4.8,4.8,4.7,4.6,4.8,4.8,20.0,27.0,32907.0
75%,2020.0,4.8,182.0,4.8,1272.0,4.9,4.8,4.8,4.7,4.9,4.9,24.0,31.0,47698.0
max,2022.0,5.0,817.0,5.0,27824.0,5.0,5.0,5.0,5.0,5.0,5.0,150.0,133.0,234114.0


### Counting

In [55]:
car_csv["Make"].value_counts()

BMW              944
Mercedes-Benz    810
Toyota           797
Honda            713
Ford             580
Jeep             495
Lexus            484
Audi             424
Chevrolet        416
Subaru           310
Cadillac         299
Nissan           251
GMC              250
Kia              240
Acura            231
INFINITI         216
Hyundai          212
Mazda            205
Tesla            186
Land             174
Volvo            148
RAM              145
Dodge            145
Volkswagen       140
Porsche          134
Lincoln          120
Buick            114
Chrysler          39
Alfa              38
Jaguar            30
Mitsubishi        20
Genesis           19
Maserati          15
Lamborghini       14
Bentley            6
MINI               5
Scion              4
FIAT               2
Mercury            2
Saturn             1
Ferrari            1
Name: Make, dtype: int64

In [56]:
car_csv["InteriorColor"].value_counts()

Black             3758
–                 1088
Jet Black          486
Gray               420
Ebony              412
                  ... 
Amaro Brown          1
Cinnamon Brown       1
Ivory / Blue         1
Oyster/Black         1
Hotspur              1
Name: InteriorColor, Length: 374, dtype: int64

### Mean

In [57]:
car_csv.mean()

  car_csv.mean()


Year                      2018.721719
ConsumerRating               4.702825
ConsumerReviews            133.187014
SellerRating                 4.412571
SellerReviews              984.089988
ComfortRating                4.771895
InteriorDesignRating         4.727391
PerformanceRating            4.696290
ValueForMoneyRating          4.537083
ExteriorStylingRating        4.782194
ReliabilityRating            4.681746
MinMPG                      22.755411
MaxMPG                      29.216548
Mileage                  37463.023350
dtype: float64

### Median

In [58]:
car_csv.median()

  car_csv.median()


Year                      2019.0
ConsumerRating               4.8
ConsumerReviews             75.0
SellerRating                 4.6
SellerReviews              542.0
ComfortRating                4.8
InteriorDesignRating         4.8
PerformanceRating            4.7
ValueForMoneyRating          4.6
ExteriorStylingRating        4.8
ReliabilityRating            4.8
MinMPG                      20.0
MaxMPG                      27.0
Mileage                  32907.0
dtype: float64

In [60]:
car_csv["ConsumerRating"].median()

4.8

### Variance

In [61]:
car_csv.var()

  car_csv.var()


Year                     4.935987e+00
ConsumerRating           5.798199e-02
ConsumerReviews          2.402055e+04
SellerRating             3.921996e-01
SellerReviews            2.589009e+06
ComfortRating            4.744630e-02
InteriorDesignRating     3.778772e-02
PerformanceRating        6.434537e-02
ValueForMoneyRating      1.143104e-01
ExteriorStylingRating    2.942487e-02
ReliabilityRating        1.355422e-01
MinMPG                   2.194211e+02
MaxMPG                   1.640906e+02
Mileage                  6.235180e+08
dtype: float64

### Covariance

In [62]:
car_csv.cov()

Unnamed: 0,Year,ConsumerRating,ConsumerReviews,SellerRating,SellerReviews,ComfortRating,InteriorDesignRating,PerformanceRating,ValueForMoneyRating,ExteriorStylingRating,ReliabilityRating,MinMPG,MaxMPG,Mileage
Year,4.935987,0.04988,-46.398581,0.131116,219.2569,0.070393,0.092641,0.063875,0.029568,0.053319,0.014807,3.721755,2.807122,-36730.65
ConsumerRating,0.04988,0.057982,2.823176,0.005864,14.33632,0.045107,0.036686,0.04922,0.074718,0.032866,0.081078,-1.172949,-0.9038,-315.1208
ConsumerReviews,-46.398581,2.823176,24020.548515,0.498662,18203.63,1.280443,0.423433,-1.63956,8.51614,0.337634,6.076564,-43.325655,-8.106507,447244.4
SellerRating,0.131116,0.005864,0.498662,0.3922,276.9727,0.006347,0.005781,0.005104,0.006505,0.003539,0.005687,-0.205989,-0.234371,-1287.962
SellerReviews,219.256864,14.336321,18203.626163,276.972701,2589009.0,15.57842,12.758017,10.237965,20.523342,9.856763,16.665153,-127.118849,-151.472037,-2430179.0
ComfortRating,0.070393,0.045107,1.280443,0.006347,15.57842,0.047446,0.032749,0.034482,0.054673,0.025003,0.060656,-1.199859,-0.981704,-316.3115
InteriorDesignRating,0.092641,0.036686,0.423433,0.005781,12.75802,0.032749,0.037788,0.03178,0.041903,0.025479,0.04157,-0.8581,-0.678655,-654.8197
PerformanceRating,0.063875,0.04922,-1.63956,0.005104,10.23796,0.034482,0.03178,0.064345,0.057836,0.026875,0.062373,-0.214695,-0.135985,-576.3879
ValueForMoneyRating,0.029568,0.074718,8.51614,0.006505,20.52334,0.054673,0.041903,0.057836,0.11431,0.039722,0.11124,-1.4276,-1.056901,-58.98514
ExteriorStylingRating,0.053319,0.032866,0.337634,0.003539,9.856763,0.025003,0.025479,0.026875,0.039722,0.029425,0.040261,-0.972238,-0.763365,-435.2813


### Corrolation

In [63]:
car_csv.corr()

Unnamed: 0,Year,ConsumerRating,ConsumerReviews,SellerRating,SellerReviews,ComfortRating,InteriorDesignRating,PerformanceRating,ValueForMoneyRating,ExteriorStylingRating,ReliabilityRating,MinMPG,MaxMPG,Mileage
Year,1.0,0.093238,-0.134749,0.094236,0.061334,0.145459,0.214507,0.11334,0.039363,0.139907,0.018102,0.113089,0.098635,-0.66209
ConsumerRating,0.093238,1.0,0.075648,0.038889,0.037002,0.859998,0.783758,0.805822,0.917777,0.7957,0.91458,-0.328847,-0.293011,-0.052409
ConsumerReviews,-0.134749,0.075648,1.0,0.005138,0.072996,0.037929,0.014055,-0.041704,0.162521,0.0127,0.106495,-0.018872,-0.004083,0.115566
SellerRating,0.094236,0.038889,0.005138,1.0,0.274863,0.046529,0.047484,0.032131,0.030724,0.032944,0.024665,-0.022205,-0.029215,-0.082362
SellerReviews,0.061334,0.037002,0.072996,0.274863,1.0,0.044448,0.040789,0.025084,0.037726,0.035712,0.028132,-0.005333,-0.007349,-0.060485
ComfortRating,0.145459,0.859998,0.037929,0.046529,0.044448,1.0,0.773431,0.624068,0.742387,0.669161,0.756367,-0.371869,-0.351834,-0.058155
InteriorDesignRating,0.214507,0.783758,0.014055,0.047484,0.040789,0.773431,1.0,0.644496,0.637564,0.764106,0.580848,-0.298005,-0.272541,-0.134903
PerformanceRating,0.11334,0.805822,-0.041704,0.032131,0.025084,0.624068,0.644496,1.0,0.674373,0.617628,0.667884,-0.057138,-0.041849,-0.090998
ValueForMoneyRating,0.039363,0.917777,0.162521,0.030724,0.037726,0.742387,0.637564,0.674373,1.0,0.684906,0.893677,-0.285052,-0.244034,-0.006987
ExteriorStylingRating,0.139907,0.7957,0.0127,0.032944,0.035712,0.669161,0.764106,0.617628,0.684906,1.0,0.637521,-0.382627,-0.347403,-0.101622


## Handling Missing Values

In [64]:
df = pd.read_csv("train.csv")
df

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,62.0,7917,Pave,,Reg,Lvl,AllPub,...,0,,,,0,8,2007,WD,Normal,175000
1456,1457,20,RL,85.0,13175,Pave,,Reg,Lvl,AllPub,...,0,,MnPrv,,0,2,2010,WD,Normal,210000
1457,1458,70,RL,66.0,9042,Pave,,Reg,Lvl,AllPub,...,0,,GdPrv,Shed,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,68.0,9717,Pave,,Reg,Lvl,AllPub,...,0,,,,0,4,2010,WD,Normal,142125


### Counting Missing Values

In [65]:
missing_values = df.isnull().sum()
missing_values = missing_values[missing_values > 0].sort_values(ascending=False)
missing_columns = missing_values.index
missing_values, missing_columns

(PoolQC          1453
 MiscFeature     1406
 Alley           1369
 Fence           1179
 FireplaceQu      690
 LotFrontage      259
 GarageType        81
 GarageYrBlt       81
 GarageFinish      81
 GarageQual        81
 GarageCond        81
 BsmtExposure      38
 BsmtFinType2      38
 BsmtFinType1      37
 BsmtCond          37
 BsmtQual          37
 MasVnrArea         8
 MasVnrType         8
 Electrical         1
 dtype: int64,
 Index(['PoolQC', 'MiscFeature', 'Alley', 'Fence', 'FireplaceQu', 'LotFrontage',
        'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageQual', 'GarageCond',
        'BsmtExposure', 'BsmtFinType2', 'BsmtFinType1', 'BsmtCond', 'BsmtQual',
        'MasVnrArea', 'MasVnrType', 'Electrical'],
       dtype='object'))

In [66]:
df.notnull().sum()

Id               1460
MSSubClass       1460
MSZoning         1460
LotFrontage      1201
LotArea          1460
                 ... 
MoSold           1460
YrSold           1460
SaleType         1460
SaleCondition    1460
SalePrice        1460
Length: 81, dtype: int64

### 1. Deleting Missing Values

#### Delet Whole Column

In [67]:
removed_column = df.drop(missing_columns[:6], axis=1)
display(removed_column, removed_column.columns)

Unnamed: 0,Id,MSSubClass,MSZoning,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,...,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,8450,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,2,2008,WD,Normal,208500
1,2,20,RL,9600,Pave,Reg,Lvl,AllPub,FR2,Gtl,...,0,0,0,0,0,5,2007,WD,Normal,181500
2,3,60,RL,11250,Pave,IR1,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,9,2008,WD,Normal,223500
3,4,70,RL,9550,Pave,IR1,Lvl,AllPub,Corner,Gtl,...,272,0,0,0,0,2,2006,WD,Abnorml,140000
4,5,60,RL,14260,Pave,IR1,Lvl,AllPub,FR2,Gtl,...,0,0,0,0,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,7917,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,8,2007,WD,Normal,175000
1456,1457,20,RL,13175,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,2,2010,WD,Normal,210000
1457,1458,70,RL,9042,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,9717,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,112,0,0,0,0,4,2010,WD,Normal,142125


Index(['Id', 'MSSubClass', 'MSZoning', 'LotArea', 'Street', 'LotShape',
       'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood',
       'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual',
       'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl',
       'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual',
       'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure',
       'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF',
       'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical',
       '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath',
       'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr',
       'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF',
       'Enclos

In [68]:
removed_column.isnull().sum().sort_values(ascending=False), f"TOTAL ROWS: {len(removed_column)}"

(GarageType      81
 GarageCond      81
 GarageYrBlt     81
 GarageFinish    81
 GarageQual      81
                 ..
 BsmtUnfSF        0
 TotalBsmtSF      0
 MSSubClass       0
 HeatingQC        0
 SalePrice        0
 Length: 75, dtype: int64,
 'TOTAL ROWS: 1460')

#### Delete Rows

In [69]:
removed_rows = removed_column.dropna()
removed_rows

Unnamed: 0,Id,MSSubClass,MSZoning,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,...,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,8450,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,2,2008,WD,Normal,208500
1,2,20,RL,9600,Pave,Reg,Lvl,AllPub,FR2,Gtl,...,0,0,0,0,0,5,2007,WD,Normal,181500
2,3,60,RL,11250,Pave,IR1,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,9,2008,WD,Normal,223500
3,4,70,RL,9550,Pave,IR1,Lvl,AllPub,Corner,Gtl,...,272,0,0,0,0,2,2006,WD,Abnorml,140000
4,5,60,RL,14260,Pave,IR1,Lvl,AllPub,FR2,Gtl,...,0,0,0,0,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,7917,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,8,2007,WD,Normal,175000
1456,1457,20,RL,13175,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,2,2010,WD,Normal,210000
1457,1458,70,RL,9042,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,9717,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,112,0,0,0,0,4,2010,WD,Normal,142125


### 2. Filling Missing Values

#### By Mean of Values

**NOTE**: Not Useful for strings

In [70]:
missing_values  = removed_column.isnull().sum()
missing_values  = missing_values[missing_values > 0]
missing_columns = missing_values.index
missing_columns

Index(['MasVnrType', 'MasVnrArea', 'BsmtQual', 'BsmtCond', 'BsmtExposure',
       'BsmtFinType1', 'BsmtFinType2', 'Electrical', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageQual', 'GarageCond'],
      dtype='object')

In [71]:
removed_column.mean()

  removed_column.mean()


Id                  730.500000
MSSubClass           56.897260
LotArea           10516.828082
OverallQual           6.099315
OverallCond           5.575342
YearBuilt          1971.267808
YearRemodAdd       1984.865753
MasVnrArea          103.685262
BsmtFinSF1          443.639726
BsmtFinSF2           46.549315
BsmtUnfSF           567.240411
TotalBsmtSF        1057.429452
1stFlrSF           1162.626712
2ndFlrSF            346.992466
LowQualFinSF          5.844521
GrLivArea          1515.463699
BsmtFullBath          0.425342
BsmtHalfBath          0.057534
FullBath              1.565068
HalfBath              0.382877
BedroomAbvGr          2.866438
KitchenAbvGr          1.046575
TotRmsAbvGrd          6.517808
Fireplaces            0.613014
GarageYrBlt        1978.506164
GarageCars            1.767123
GarageArea          472.980137
WoodDeckSF           94.244521
OpenPorchSF          46.660274
EnclosedPorch        21.954110
3SsnPorch             3.409589
ScreenPorch          15.060959
PoolArea

In [72]:
fill_mode = removed_column.fillna(removed_column.mean())
display(fill_mode)
fill_mode.isnull().sum().sort_values()

  fill_mode = removed_column.fillna(removed_column.mean())


Unnamed: 0,Id,MSSubClass,MSZoning,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,...,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,8450,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,2,2008,WD,Normal,208500
1,2,20,RL,9600,Pave,Reg,Lvl,AllPub,FR2,Gtl,...,0,0,0,0,0,5,2007,WD,Normal,181500
2,3,60,RL,11250,Pave,IR1,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,9,2008,WD,Normal,223500
3,4,70,RL,9550,Pave,IR1,Lvl,AllPub,Corner,Gtl,...,272,0,0,0,0,2,2006,WD,Abnorml,140000
4,5,60,RL,14260,Pave,IR1,Lvl,AllPub,FR2,Gtl,...,0,0,0,0,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,7917,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,8,2007,WD,Normal,175000
1456,1457,20,RL,13175,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,0,2,2010,WD,Normal,210000
1457,1458,70,RL,9042,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,0,0,0,0,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,9717,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,112,0,0,0,0,4,2010,WD,Normal,142125


Id                0
SaleCondition     0
HeatingQC         0
CentralAir        0
1stFlrSF          0
                 ..
BsmtFinType2     38
GarageQual       81
GarageCond       81
GarageFinish     81
GarageType       81
Length: 75, dtype: int64

In [73]:
removed_column.mode(axis="columns")

  warn(f"Unable to sort modes: {err}")
  warn(f"Unable to sort modes: {err}")
  warn(f"Unable to sort modes: {err}")
  warn(f"Unable to sort modes: {err}")


Unnamed: 0,0,1
0,0,
1,0.0,
2,0,
3,0.0,
4,0,
...,...,...
1455,0.0,
1456,0,
1457,0.0,
1458,0.0,


### Other Ways
- By `mode` (Most Repeated)
- By `median`

## Converting Categorical 

In [74]:
columns = removed_column.columns
str_columns = (removed_column.dtypes == object)
str_columns = str_columns[str_columns == True].index
removed_column[str_columns]

Unnamed: 0,MSZoning,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,...,Electrical,KitchenQual,Functional,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,SaleType,SaleCondition
0,RL,Pave,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,...,SBrkr,Gd,Typ,Attchd,RFn,TA,TA,Y,WD,Normal
1,RL,Pave,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,...,SBrkr,TA,Typ,Attchd,RFn,TA,TA,Y,WD,Normal
2,RL,Pave,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,...,SBrkr,Gd,Typ,Attchd,RFn,TA,TA,Y,WD,Normal
3,RL,Pave,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,...,SBrkr,Gd,Typ,Detchd,Unf,TA,TA,Y,WD,Abnorml
4,RL,Pave,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,...,SBrkr,Gd,Typ,Attchd,RFn,TA,TA,Y,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,RL,Pave,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,...,SBrkr,TA,Typ,Attchd,RFn,TA,TA,Y,WD,Normal
1456,RL,Pave,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,...,SBrkr,TA,Min1,Attchd,Unf,TA,TA,Y,WD,Normal
1457,RL,Pave,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,...,SBrkr,Gd,Typ,Attchd,RFn,TA,TA,Y,WD,Normal
1458,RL,Pave,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,...,FuseA,Gd,Typ,Attchd,Unf,TA,TA,Y,WD,Normal


In [75]:
pd.get_dummies(removed_column[str_columns])

Unnamed: 0,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Grvl,Street_Pave,LotShape_IR1,LotShape_IR2,LotShape_IR3,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,0,0,0,1,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,1,0
1,0,0,0,1,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,1,0
2,0,0,0,1,0,0,1,1,0,0,...,0,0,0,1,0,0,0,0,1,0
3,0,0,0,1,0,0,1,1,0,0,...,0,0,0,1,1,0,0,0,0,0
4,0,0,0,1,0,0,1,1,0,0,...,0,0,0,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,0,0,0,1,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,1,0
1456,0,0,0,1,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,1,0
1457,0,0,0,1,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,1,0
1458,0,0,0,1,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,1,0



-------------------------

# Transforming

In [76]:
car_csv = pd.read_csv("./cars_raw.csv")
car_csv.Price.values

array(['$39,998', '$49,985', '$41,860', ..., '$26,944', '$28,568',
       '$32,091'], dtype=object)

In [77]:
def dummy_function_transform(x: pd.Series) -> pd.Series:
    try:
        return x + 1
    except:
        return x

In [78]:
car_csv.transform(dummy_function_transform)

Unnamed: 0,Year,Make,Model,Used/New,Price,ConsumerRating,ConsumerReviews,SellerType,SellerName,SellerRating,...,InteriorColor,Drivetrain,MinMPG,MaxMPG,FuelType,Transmission,Engine,VIN,Stock#,Mileage
0,2020,Toyota,Sienna SE,Used,"$39,998",5.6,46,Dealer,CarMax Murrieta - Now offering Curbside Pickup...,4.3,...,Black,Front-wheel Drive,20,28,Gasoline,8-Speed Automatic,3.5L V6 24V PDI DOHC,5TDXZ3DC2KS015402,22998646,29404
1,2019,Ford,F-150 Lariat,Used,"$49,985",5.8,818,Dealer,Giant Chevrolet,5.8,...,Black,Four-wheel Drive,20,25,Gasoline,10-Speed Automatic,3.5L V6 24V PDI DOHC Twin Turbo,1FTEW1EG2JFD44217,22418A,32930
2,2018,RAM,1500 Laramie,Used,"$41,860",5.7,496,Dealer,Gill Auto Group Madera,5.6,...,Black,Four-wheel Drive,16,22,Gasoline,8-Speed Automatic,5.7L V8 16V MPFI OHV,1C6RR7VT5HS842283,NG277871G,23174
3,2022,Honda,Accord Sport SE,Used,"$28,500",6.0,37,Dealer,AutoSavvy Las Vegas,5.6,...,–,Front-wheel Drive,30,36,Gasoline,Automatic CVT,1.5L I4 16V GDI DOHC Turbo,1HGCV1F49MA038035,54237,10599
4,2021,Lexus,RX 350,Used,"$49,000",5.8,77,Dealer,Lexus of Henderson,5.8,...,Birch,Front-wheel Drive,21,28,Gasoline,8-Speed Automatic,3.5L V6 24V PDI DOHC,2T2AZMAA8LC156270,HDT4181A,28138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9374,2020,Subaru,Crosstrek 2.0i Premium,Used,"$27,374",5.7,206,Dealer,Bertera Subaru of West Springfield,5.4,...,Gray,All-wheel Drive,28,34,Gasoline,Automatic CVT,2.0L H4 16V GDI DOHC,JF2GTADC4KH318032,220502A,15607
9375,2020,Audi,Q8 3.0T Premium,Used,"$61,998",5.8,28,Dealer,Autobahn USA Westborough,5.8,...,Black,All-wheel Drive,18,23,Hybrid,8-Speed Automatic,3.0L V6 24V GDI DOHC Turbo Hybrid,WA1AVAF14KD015389,AB4719,46856
9376,2018,Buick,Enclave Leather,Used,"$26,944",5.8,138,Dealer,Tulley Automotive Group,5.7,...,Ebony,All-wheel Drive,16,23,Gasoline,6-Speed Automatic,3.6L V6 24V GDI DOHC,5GAKVBKD4HJ190334,B221381B,62650
9377,2020,Subaru,Forester Premium,Used,"$28,568",5.7,280,Dealer,Ira Subaru,5.4,...,Black,All-wheel Drive,27,34,Gasoline,Automatic CVT,2.5L H4 16V GDI DOHC,JF2SKAGC9KH423450,KH423450,30761


In [79]:
def price_parser(price: pd.Series) -> pd.Series:
    print(price)

    def f(x) -> float:
        if type(x) == float:
            return x
        try:
            return float("".join(x[1:].split(",")).strip())
        except:
            return None
        
    return price.transform(f)

transformed_price = car_csv.Price.transform(price_parser)
transformed_price

$39,998
0       $39,998
1       $49,985
2       $41,860
3       $28,500
4       $49,000
         ...   
9374    $27,374
9375    $61,998
9376    $26,944
9377    $28,568
9378    $32,091
Name: Price, Length: 9379, dtype: object


0       39998.0
1       49985.0
2       41860.0
3       28500.0
4       49000.0
         ...   
9374    27374.0
9375    61998.0
9376    26944.0
9377    28568.0
9378    32091.0
Name: Price, Length: 9379, dtype: float64

In [80]:
car_csv.Price = transformed_price

In [81]:
def f(x: pd.Series):
    return float(str(x).lstrip("$").replace(",", "").replace("Not Priced", "0"))

print(car_csv.Price.sort_values().values[-1])
# car_csv.Price.transform(f)

nan


In [82]:
car_csv

Unnamed: 0,Year,Make,Model,Used/New,Price,ConsumerRating,ConsumerReviews,SellerType,SellerName,SellerRating,...,InteriorColor,Drivetrain,MinMPG,MaxMPG,FuelType,Transmission,Engine,VIN,Stock#,Mileage
0,2019,Toyota,Sienna SE,Used,39998.0,4.6,45,Dealer,CarMax Murrieta - Now offering Curbside Pickup...,3.3,...,Black,Front-wheel Drive,19,27,Gasoline,8-Speed Automatic,3.5L V6 24V PDI DOHC,5TDXZ3DC2KS015402,22998646,29403
1,2018,Ford,F-150 Lariat,Used,49985.0,4.8,817,Dealer,Giant Chevrolet,4.8,...,Black,Four-wheel Drive,19,24,Gasoline,10-Speed Automatic,3.5L V6 24V PDI DOHC Twin Turbo,1FTEW1EG2JFD44217,22418A,32929
2,2017,RAM,1500 Laramie,Used,41860.0,4.7,495,Dealer,Gill Auto Group Madera,4.6,...,Black,Four-wheel Drive,15,21,Gasoline,8-Speed Automatic,5.7L V8 16V MPFI OHV,1C6RR7VT5HS842283,NG277871G,23173
3,2021,Honda,Accord Sport SE,Used,28500.0,5.0,36,Dealer,AutoSavvy Las Vegas,4.6,...,–,Front-wheel Drive,29,35,Gasoline,Automatic CVT,1.5L I4 16V GDI DOHC Turbo,1HGCV1F49MA038035,54237,10598
4,2020,Lexus,RX 350,Used,49000.0,4.8,76,Dealer,Lexus of Henderson,4.8,...,Birch,Front-wheel Drive,20,27,Gasoline,8-Speed Automatic,3.5L V6 24V PDI DOHC,2T2AZMAA8LC156270,HDT4181A,28137
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9374,2019,Subaru,Crosstrek 2.0i Premium,Used,27374.0,4.7,205,Dealer,Bertera Subaru of West Springfield,4.4,...,Gray,All-wheel Drive,27,33,Gasoline,Automatic CVT,2.0L H4 16V GDI DOHC,JF2GTADC4KH318032,220502A,15606
9375,2019,Audi,Q8 3.0T Premium,Used,61998.0,4.8,27,Dealer,Autobahn USA Westborough,4.8,...,Black,All-wheel Drive,17,22,Hybrid,8-Speed Automatic,3.0L V6 24V GDI DOHC Turbo Hybrid,WA1AVAF14KD015389,AB4719,46855
9376,2017,Buick,Enclave Leather,Used,26944.0,4.8,137,Dealer,Tulley Automotive Group,4.7,...,Ebony,All-wheel Drive,15,22,Gasoline,6-Speed Automatic,3.6L V6 24V GDI DOHC,5GAKVBKD4HJ190334,B221381B,62649
9377,2019,Subaru,Forester Premium,Used,28568.0,4.7,279,Dealer,Ira Subaru,4.4,...,Black,All-wheel Drive,26,33,Gasoline,Automatic CVT,2.5L H4 16V GDI DOHC,JF2SKAGC9KH423450,KH423450,30760


In [83]:
car_csv["ConsumerRating"].transform("sqrt")

0       2.144761
1       2.190890
2       2.167948
3       2.236068
4       2.190890
          ...   
9374    2.167948
9375    2.190890
9376    2.190890
9377    2.167948
9378    2.190890
Name: ConsumerRating, Length: 9379, dtype: float64

In [84]:
car_csv["ConsumerRating"].transform([np.sqrt, np.exp])

Unnamed: 0,sqrt,exp
0,2.144761,99.484316
1,2.190890,121.510418
2,2.167948,109.947172
3,2.236068,148.413159
4,2.190890,121.510418
...,...,...
9374,2.167948,109.947172
9375,2.190890,121.510418
9376,2.190890,121.510418
9377,2.167948,109.947172


In [85]:
car_csv.transform({
    "Price": f,
    "ConsumerRating": np.exp
})

Unnamed: 0,Price,ConsumerRating
0,39998.0,99.484316
1,49985.0,121.510418
2,41860.0,109.947172
3,28500.0,148.413159
4,49000.0,121.510418
...,...,...
9374,27374.0,109.947172
9375,61998.0,121.510418
9376,26944.0,121.510418
9377,28568.0,109.947172


-----
# Grouping

## Single Group

In [86]:
companies_uniques = car_csv.Make.unique()
print(companies_uniques) 
print("Total:", companies_uniques.size)

['Toyota' 'Ford' 'RAM' 'Honda' 'Lexus' 'Mercedes-Benz' 'Dodge' 'Subaru'
 'Acura' 'BMW' 'Audi' 'Volvo' 'Lincoln' 'Land' 'Chevrolet' 'INFINITI'
 'Tesla' 'Jeep' 'Chrysler' 'Mazda' 'Kia' 'Volkswagen' 'Porsche' 'Nissan'
 'Hyundai' 'GMC' 'Buick' 'Genesis' 'Cadillac' 'Alfa' 'FIAT' 'Jaguar'
 'MINI' 'Lamborghini' 'Maserati' 'Mitsubishi' 'Bentley' 'Mercury' 'Scion'
 'Saturn' 'Ferrari']
Total: 41


In [87]:
groups = car_csv.groupby("Make")
display(groups)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000024FE9B48100>

In [88]:
groups.ngroups,

(41,)

In [89]:
groups.get_group(companies_uniques[0])

Unnamed: 0,Year,Make,Model,Used/New,Price,ConsumerRating,ConsumerReviews,SellerType,SellerName,SellerRating,...,InteriorColor,Drivetrain,MinMPG,MaxMPG,FuelType,Transmission,Engine,VIN,Stock#,Mileage
0,2019,Toyota,Sienna SE,Used,39998.0,4.6,45,Dealer,CarMax Murrieta - Now offering Curbside Pickup...,3.3,...,Black,Front-wheel Drive,19,27,Gasoline,8-Speed Automatic,3.5L V6 24V PDI DOHC,5TDXZ3DC2KS015402,22998646,29403
5,2012,Toyota,4Runner SR5,Used,23541.0,4.7,34,Dealer,AutoNation Toyota Hayward,4.4,...,–,Rear-wheel Drive,17,23,Gasoline,5-Speed Automatic,4.0L V6 24V MPFI DOHC,JTEZU5JR3C5043790,C5043790,105469
91,2021,Toyota,Camry SE,Used,31998.0,4.8,69,Dealer,CarMax Pleasanton - Now offering Curbside Pick...,5.0,...,Black,Front-wheel Drive,28,39,Gasoline,8-Speed Automatic,2.5L I4 16V PDI DOHC,4T1G11AK8MU410766,22549172,27521
99,2021,Toyota,4Runner Limited,Toyota Certified,51931.0,4.9,42,Dealer,Stevinson Toyota West,4.9,...,Redwood,Four-wheel Drive,16,19,Gasoline,5-Speed Automatic,4.0L V6 24V MPFI DOHC,JTEKU5JR7M5854429,S18938,20606
110,2019,Toyota,Highlander SE,Used,39368.0,4.6,363,Dealer,Heartland Chrysler Dodge Jeep Ram - Arkansas,1.0,...,Almond,Front-wheel Drive,21,27,Gasoline,8-Speed Automatic,3.5L V6 24V PDI DOHC,5TDKZRFH5KS564775,UH564775,27027
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9300,2020,Toyota,Tundra SR5,Used,47699.0,4.8,66,Dealer,South Pointe Honda,4.7,...,Graphite,Four-wheel Drive,13,17,Gasoline,6-Speed Automatic,5.7L V8 32V MPFI DOHC,5TFDY5F1XLX919149,LX919149,21838
9310,2018,Toyota,RAV4 LE,Used,25988.0,4.8,770,Dealer,Bud Weiser Motors,4.8,...,–,Front-wheel Drive,23,30,Gasoline,6-Speed Automatic,2.5L I4 16V MPFI DOHC,2T3ZFREV8JW521138,50194B,41504
9327,2021,Toyota,Tundra Limited,Used,58980.0,4.9,32,Dealer,Nalley Toyota Stonecrest,4.5,...,Graphite,Four-wheel Drive,13,17,Gasoline,6-Speed Automatic,5.7L V8 32V MPFI DOHC,5TFHY5F1XMX033609,NTSP210566,15075
9332,2020,Toyota,Tacoma TRD Off Road,Used,42406.0,4.7,123,Dealer,Fred Anderson Toyota of Asheville,4.0,...,Cement,Four-wheel Drive,18,22,Gasoline,6-Speed Automatic,3.5L V6 24V PDI DOHC,3TMCZ5AN8LM303944,LM303944P,45728


In [90]:
groups["ConsumerRating"].mean()

Make
Acura            4.681818
Alfa             4.747368
Audi             4.758491
BMW              4.754237
Bentley          4.583333
Buick            4.714035
Cadillac         4.584615
Chevrolet        4.630769
Chrysler         4.746154
Dodge            4.774483
FIAT             4.300000
Ferrari          5.000000
Ford             4.722241
GMC              4.637600
Genesis          4.836842
Honda            4.773212
Hyundai          4.741509
INFINITI         4.670833
Jaguar           4.756667
Jeep             4.739798
Kia              4.765417
Lamborghini      4.771429
Land             4.209770
Lexus            4.762397
Lincoln          4.620833
MINI             4.740000
Maserati         4.726667
Mazda            4.813171
Mercedes-Benz    4.690247
Mercury          4.300000
Mitsubishi       4.775000
Nissan           4.727888
Porsche          4.779851
RAM              4.777931
Saturn           4.500000
Scion            4.425000
Subaru           4.709355
Tesla            4.073656
Toyota 

In [91]:
groups.mean()

Unnamed: 0_level_0,Year,Price,ConsumerRating,ConsumerReviews,SellerRating,SellerReviews,ComfortRating,InteriorDesignRating,PerformanceRating,ValueForMoneyRating,ExteriorStylingRating,ReliabilityRating,MinMPG,MaxMPG,Mileage
Make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Acura,2018.571429,35599.082251,4.681818,79.207792,4.42684,793.675325,4.770996,4.690909,4.642424,4.562338,4.724242,4.705195,20.554113,28.147186,38335.896104
Alfa,2019.078947,34577.5,4.747368,33.921053,4.45,846.947368,4.713158,4.668421,4.884211,4.657895,4.897368,4.597368,22.605263,29.736842,28599.631579
Audi,2018.735849,46193.441038,4.758491,39.096698,4.452358,804.313679,4.819811,4.753066,4.773349,4.529717,4.804009,4.740802,20.945755,27.261792,35421.521226
BMW,2018.560381,44383.168611,4.754237,44.556144,4.347669,955.024364,4.834428,4.799682,4.842267,4.471398,4.804661,4.689513,21.254237,28.592161,36422.061441
Bentley,2014.833333,146911.833333,4.583333,2.666667,4.466667,579.666667,4.5,4.666667,4.833333,4.1,4.9,4.4,12.333333,20.666667,23595.833333
Buick,2018.464912,27255.438596,4.714035,115.140351,4.399123,859.333333,4.763158,4.689474,4.637719,4.6,4.800877,4.713158,21.008772,27.385965,34061.763158
Cadillac,2018.698997,41564.702341,4.584615,75.280936,4.502676,841.745819,4.727759,4.698662,4.542475,4.357525,4.746154,4.47592,18.655518,25.628763,36595.993311
Chevrolet,2018.540865,36703.641827,4.630769,153.84375,4.386298,880.923077,4.686538,4.649038,4.604327,4.491106,4.746875,4.616587,19.5,26.300481,41834.552885
Chrysler,2017.564103,26221.923077,4.746154,76.589744,4.161538,708.282051,4.794872,4.738462,4.712821,4.64359,4.817949,4.705128,18.692308,28.076923,60961.230769
Dodge,2019.137931,38196.931034,4.774483,79.827586,4.284138,964.068966,4.849655,4.758621,4.772414,4.613103,4.852414,4.767586,16.627586,24.510345,34514.131034


## Multiple Groups

In [92]:
toyota_group = groups.get_group("Toyota") 
len(toyota_group), len(toyota_group.Model.unique())

(797, 86)

In [93]:
groups = car_csv.groupby(["Make", "Model"])

In [94]:
keys = list(groups.groups.keys())
display(keys)

[('Acura', 'ILX'),
 ('Acura', 'ILX Premium & A-SPEC Packages'),
 ('Acura', 'ILX Premium Package'),
 ('Acura', 'ILX w/AcuraWatch Plus'),
 ('Acura', 'ILX w/Premium Pkg'),
 ('Acura', 'MDX 3.5L'),
 ('Acura', 'MDX 3.5L AcuraWatch Plus Package'),
 ('Acura', 'MDX 3.5L Technology & A-Spec Pkgs'),
 ('Acura', 'MDX 3.5L Technology Package'),
 ('Acura', 'MDX 3.5L w/Advance Package'),
 ('Acura', 'MDX 3.5L w/Technology Package'),
 ('Acura', 'MDX 3.7L Advance'),
 ('Acura', 'MDX A-Spec'),
 ('Acura', 'MDX Advance'),
 ('Acura', 'MDX Base'),
 ('Acura', 'MDX SH-AWD 7-Passenger'),
 ('Acura', 'MDX Sport Hybrid 3.0L w/Technology Package'),
 ('Acura', 'MDX Touring'),
 ('Acura', 'RDX'),
 ('Acura', 'RDX A-Spec'),
 ('Acura', 'RDX AcuraWatch Plus Package'),
 ('Acura', 'RDX AcuraWatch Plus Pkg'),
 ('Acura', 'RDX Advance Package'),
 ('Acura', 'RDX Base'),
 ('Acura', 'RDX Base (A10)'),
 ('Acura', 'RDX Technology'),
 ('Acura', 'RDX Technology Package'),
 ('Acura', 'RDX w/A-Spec Package'),
 ('Acura', 'RDX w/Technology

In [95]:
groups.get_group(('BMW', 'X5 M50i'))

Unnamed: 0,Year,Make,Model,Used/New,Price,ConsumerRating,ConsumerReviews,SellerType,SellerName,SellerRating,...,InteriorColor,Drivetrain,MinMPG,MaxMPG,FuelType,Transmission,Engine,VIN,Stock#,Mileage
728,2020,BMW,X5 M50i,Used,66450.0,4.7,53,Dealer,26 Motors Bronx,3.7,...,Brown,All-wheel Drive,16,22,Gasoline,8-Speed Automatic,4.4L V8 32V GDI DOHC Twin Turbo,5UXJU4C03L9C17975,C17975,25578
2120,2020,BMW,X5 M50i,Used,79900.0,4.7,53,Dealer,Nalley BMW,4.1,...,Black,All-wheel Drive,16,22,Gasoline,8-Speed Automatic,4.4L V8 32V GDI DOHC Twin Turbo,5UXJU4C01L9B96284,NBD220937A,23649
3763,2020,BMW,X5 M50i,Used,78595.0,4.7,53,Dealer,Mercedes-Benz of Massapequa,4.8,...,Ivory / Blue,All-wheel Drive,16,22,Gasoline,8-Speed Automatic,4.4L V8 32V GDI DOHC Twin Turbo,5UXJU4C05L9C83069,L9C83069P,16615


In [96]:
groups_mean = groups.mean()
groups_mean

Unnamed: 0_level_0,Unnamed: 1_level_0,Year,Price,ConsumerRating,ConsumerReviews,SellerRating,SellerReviews,ComfortRating,InteriorDesignRating,PerformanceRating,ValueForMoneyRating,ExteriorStylingRating,ReliabilityRating,MinMPG,MaxMPG,Mileage
Make,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Acura,ILX,2020.000000,30998.000000,4.500000,16.0,3.000000,5.000000,4.500000,4.600000,4.500000,4.400000,4.700000,4.400000,24.000000,34.000000,23203.000000
Acura,ILX Premium & A-SPEC Packages,2018.333333,25151.666667,4.533333,28.0,4.333333,635.666667,4.533333,4.500000,4.466667,4.466667,4.666667,4.566667,24.333333,34.666667,41077.000000
Acura,ILX Premium Package,2021.000000,33059.500000,4.900000,3.0,4.700000,532.000000,4.700000,4.700000,5.000000,5.000000,4.700000,5.000000,24.000000,34.000000,12202.000000
Acura,ILX w/AcuraWatch Plus,2017.000000,6950.000000,4.700000,38.0,3.500000,63.000000,4.700000,4.700000,4.600000,4.700000,4.800000,4.800000,25.000000,35.000000,54640.000000
Acura,ILX w/Premium Pkg,2019.000000,30995.000000,4.700000,24.0,4.500000,272.000000,4.700000,4.600000,4.600000,4.600000,4.800000,4.600000,24.000000,34.000000,11872.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Volvo,XC90 T6 Inscription 6 Passenger,2020.000000,54544.500000,4.700000,35.0,4.650000,491.000000,4.900000,4.800000,4.600000,4.500000,4.900000,4.700000,18.000000,26.000000,25194.500000
Volvo,XC90 T6 Inscription 7 Passenger,2020.000000,56996.000000,4.700000,35.0,3.300000,7.000000,4.900000,4.800000,4.600000,4.500000,4.900000,4.700000,18.000000,26.000000,22296.000000
Volvo,XC90 T6 Momentum,2018.733333,42766.133333,4.693333,65.8,4.180000,601.066667,4.893333,4.733333,4.760000,4.573333,4.813333,4.560000,19.133333,26.000000,41463.333333
Volvo,XC90 T6 Momentum 7 Passenger,2020.571429,52570.714286,4.871429,27.0,4.071429,233.571429,4.957143,4.914286,4.828571,4.785714,4.957143,4.871429,18.571429,27.142857,21398.571429


In [97]:
groups_mean.loc["Toyota"]

Unnamed: 0_level_0,Year,Price,ConsumerRating,ConsumerReviews,SellerRating,SellerReviews,ComfortRating,InteriorDesignRating,PerformanceRating,ValueForMoneyRating,ExteriorStylingRating,ReliabilityRating,MinMPG,MaxMPG,Mileage
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
4Runner Limited,2018.086957,42315.797101,4.801449,147.927536,4.550725,1498.000000,4.818841,4.707246,4.727536,4.717391,4.866667,4.898551,16.753623,20.014493,48403.681159
4Runner Limited 4WD,2018.000000,41571.000000,4.800000,253.000000,4.900000,826.000000,4.800000,4.700000,4.700000,4.700000,4.900000,4.900000,17.000000,20.000000,55920.000000
4Runner Limited Nightshade,2019.000000,45241.000000,4.800000,189.000000,4.700000,311.000000,4.800000,4.700000,4.800000,4.700000,4.800000,4.900000,17.000000,20.000000,40817.000000
4Runner Nightshade,2021.000000,53439.000000,4.900000,42.000000,4.425000,1291.000000,5.000000,4.900000,4.800000,4.900000,4.900000,4.900000,16.000000,19.000000,17851.500000
4Runner SR5,2017.411765,34848.705882,4.770588,123.764706,4.323529,1002.705882,4.782353,4.676471,4.676471,4.670588,4.823529,4.894118,16.705882,20.470588,69776.764706
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Tundra SR,2021.000000,38792.000000,4.900000,32.000000,4.600000,43.000000,4.900000,4.800000,4.800000,4.800000,4.900000,4.900000,13.000000,17.000000,21893.000000
Tundra SR5,2019.571429,45693.928571,4.823810,80.190476,4.364286,747.952381,4.852381,4.680952,4.792857,4.685714,4.850000,4.897619,13.095238,17.166667,30718.095238
Tundra W/ NAVIGATION&PLATINUM,2019.000000,49000.000000,4.800000,126.000000,4.800000,2791.000000,4.900000,4.700000,4.900000,4.700000,4.900000,4.900000,13.000000,17.000000,33344.000000
Venza Limited,2021.000000,40396.000000,4.700000,70.000000,3.500000,13.000000,4.700000,4.700000,4.700000,4.500000,4.900000,4.700000,40.000000,37.000000,16728.000000


In [98]:
groups_mean.loc["Toyota", "4Runner Limited"]

Year                      2018.086957
Price                    42315.797101
ConsumerRating               4.801449
ConsumerReviews            147.927536
SellerRating                 4.550725
SellerReviews             1498.000000
ComfortRating                4.818841
InteriorDesignRating         4.707246
PerformanceRating            4.727536
ValueForMoneyRating          4.717391
ExteriorStylingRating        4.866667
ReliabilityRating            4.898551
MinMPG                      16.753623
MaxMPG                      20.014493
Mileage                  48403.681159
Name: (Toyota, 4Runner Limited), dtype: float64

## Multiple Calculation On Group Columns

In [99]:
groups.agg(["min", "max", "mean", "var"])

  groups.agg(["min", "max", "mean", "var"])


Unnamed: 0_level_0,Unnamed: 1_level_0,Year,Year,Year,Year,Price,Price,Price,Price,ConsumerRating,ConsumerRating,...,MinMPG,MinMPG,MaxMPG,MaxMPG,MaxMPG,MaxMPG,Mileage,Mileage,Mileage,Mileage
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean,var,min,max,mean,var,min,max,...,mean,var,min,max,mean,var,min,max,mean,var
Make,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Acura,ILX,2020,2020,2020.000000,,30998.0,30998.0,30998.000000,,4.5,4.5,...,24.000000,,34,34,34.000000,,23203,23203,23203.000000,
Acura,ILX Premium & A-SPEC Packages,2016,2020,2018.333333,4.333333,20575.0,30885.0,25151.666667,2.757743e+07,4.4,4.7,...,24.333333,0.333333,34,36,34.666667,1.333333,14940,79206,41077.000000,1.140386e+09
Acura,ILX Premium Package,2021,2021,2021.000000,0.000000,31120.0,34999.0,33059.500000,7.523320e+06,4.9,4.9,...,24.000000,0.000000,34,34,34.000000,0.000000,10750,13654,12202.000000,4.216608e+06
Acura,ILX w/AcuraWatch Plus,2017,2017,2017.000000,,6950.0,6950.0,6950.000000,,4.7,4.7,...,25.000000,,35,35,35.000000,,54640,54640,54640.000000,
Acura,ILX w/Premium Pkg,2019,2019,2019.000000,,30995.0,30995.0,30995.000000,,4.7,4.7,...,24.000000,,34,34,34.000000,,11872,11872,11872.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Volvo,XC90 T6 Inscription 6 Passenger,2020,2020,2020.000000,0.000000,52589.0,56500.0,54544.500000,7.647960e+06,4.7,4.7,...,18.000000,0.000000,26,26,26.000000,0.000000,13752,36637,25194.500000,2.618616e+08
Volvo,XC90 T6 Inscription 7 Passenger,2020,2020,2020.000000,,56996.0,56996.0,56996.000000,,4.7,4.7,...,18.000000,,26,26,26.000000,,22296,22296,22296.000000,
Volvo,XC90 T6 Momentum,2016,2021,2018.733333,1.495238,26923.0,54900.0,42766.133333,3.756396e+07,4.5,5.0,...,19.133333,0.123810,25,28,26.000000,0.428571,11388,88263,41463.333333,4.096078e+08
Volvo,XC90 T6 Momentum 7 Passenger,2020,2021,2020.571429,0.285714,43419.0,57990.0,52570.714286,2.608671e+07,4.7,5.0,...,18.571429,0.285714,26,28,27.142857,1.142857,9224,48957,21398.571429,1.697384e+08


In [100]:
def diff(x: pd.Series) -> pd.Series:
#     print(x.name, x.values, x.index)
#     print("-----------------------")
    return x.max() - x.min()

In [101]:
groups.agg(["min", "max", "mean", "var", diff])

  groups.agg(["min", "max", "mean", "var", diff])


Unnamed: 0_level_0,Unnamed: 1_level_0,Year,Year,Year,Year,Year,Price,Price,Price,Price,Price,...,MaxMPG,MaxMPG,MaxMPG,MaxMPG,MaxMPG,Mileage,Mileage,Mileage,Mileage,Mileage
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean,var,diff,min,max,mean,var,diff,...,min,max,mean,var,diff,min,max,mean,var,diff
Make,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Acura,ILX,2020,2020,2020.000000,,0,30998.0,30998.0,30998.000000,,0.0,...,34,34,34.000000,,0,23203,23203,23203.000000,,0
Acura,ILX Premium & A-SPEC Packages,2016,2020,2018.333333,4.333333,4,20575.0,30885.0,25151.666667,2.757743e+07,10310.0,...,34,36,34.666667,1.333333,2,14940,79206,41077.000000,1.140386e+09,64266
Acura,ILX Premium Package,2021,2021,2021.000000,0.000000,0,31120.0,34999.0,33059.500000,7.523320e+06,3879.0,...,34,34,34.000000,0.000000,0,10750,13654,12202.000000,4.216608e+06,2904
Acura,ILX w/AcuraWatch Plus,2017,2017,2017.000000,,0,6950.0,6950.0,6950.000000,,0.0,...,35,35,35.000000,,0,54640,54640,54640.000000,,0
Acura,ILX w/Premium Pkg,2019,2019,2019.000000,,0,30995.0,30995.0,30995.000000,,0.0,...,34,34,34.000000,,0,11872,11872,11872.000000,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Volvo,XC90 T6 Inscription 6 Passenger,2020,2020,2020.000000,0.000000,0,52589.0,56500.0,54544.500000,7.647960e+06,3911.0,...,26,26,26.000000,0.000000,0,13752,36637,25194.500000,2.618616e+08,22885
Volvo,XC90 T6 Inscription 7 Passenger,2020,2020,2020.000000,,0,56996.0,56996.0,56996.000000,,0.0,...,26,26,26.000000,,0,22296,22296,22296.000000,,0
Volvo,XC90 T6 Momentum,2016,2021,2018.733333,1.495238,5,26923.0,54900.0,42766.133333,3.756396e+07,27977.0,...,25,28,26.000000,0.428571,3,11388,88263,41463.333333,4.096078e+08,76875
Volvo,XC90 T6 Momentum 7 Passenger,2020,2021,2020.571429,0.285714,1,43419.0,57990.0,52570.714286,2.608671e+07,14571.0,...,26,28,27.142857,1.142857,2,9224,48957,21398.571429,1.697384e+08,39733


# Applying

In [102]:
car_csv.Price.apply(f)

0       39998.0
1       49985.0
2       41860.0
3       28500.0
4       49000.0
         ...   
9374    27374.0
9375    61998.0
9376    26944.0
9377    28568.0
9378    32091.0
Name: Price, Length: 9379, dtype: float64

In [103]:
car_csv.apply([np.mean, np.max])

  car_csv.apply([np.mean, np.max])


Unnamed: 0,Year,Make,Model,Used/New,Price,ConsumerRating,ConsumerReviews,SellerType,SellerName,SellerRating,...,InteriorColor,Drivetrain,MinMPG,MaxMPG,FuelType,Transmission,Engine,VIN,Stock#,Mileage
mean,2018.721719,,,,39834.349371,4.702825,133.187014,,,4.412571,...,,,22.755411,29.216548,,,,,,37463.02335
amax,2022.0,Volvo,xB Base,Volvo Certified,449996.0,5.0,817.0,Private,xDrive Motors Inc.,5.0,...,–,–,150.0,133.0,–,–,–,ZPBUA1ZL9LLA10152,–,234114.0


In [104]:
def f(x):
    return x.ConsumerRating - x.SellerRating

make_model_group = car_csv.groupby(["Make", "Model"]).apply(f)
for i, idx in enumerate(make_model_group.index):
    display(make_model_group)
    if i == 5:
        break


Make   Model                              
Acura  ILX                            6490    1.5
       ILX Premium & A-SPEC Packages  54      1.0
                                      262    -0.3
                                      1720   -0.1
       ILX Premium Package            2777    0.2
                                             ... 
Volvo  XC90 T6 R-Design               2436    1.3
                                      2811    0.5
                                      4885    1.3
                                      6077    1.3
                                      6270    0.5
Length: 9379, dtype: float64

Make   Model                              
Acura  ILX                            6490    1.5
       ILX Premium & A-SPEC Packages  54      1.0
                                      262    -0.3
                                      1720   -0.1
       ILX Premium Package            2777    0.2
                                             ... 
Volvo  XC90 T6 R-Design               2436    1.3
                                      2811    0.5
                                      4885    1.3
                                      6077    1.3
                                      6270    0.5
Length: 9379, dtype: float64

Make   Model                              
Acura  ILX                            6490    1.5
       ILX Premium & A-SPEC Packages  54      1.0
                                      262    -0.3
                                      1720   -0.1
       ILX Premium Package            2777    0.2
                                             ... 
Volvo  XC90 T6 R-Design               2436    1.3
                                      2811    0.5
                                      4885    1.3
                                      6077    1.3
                                      6270    0.5
Length: 9379, dtype: float64

Make   Model                              
Acura  ILX                            6490    1.5
       ILX Premium & A-SPEC Packages  54      1.0
                                      262    -0.3
                                      1720   -0.1
       ILX Premium Package            2777    0.2
                                             ... 
Volvo  XC90 T6 R-Design               2436    1.3
                                      2811    0.5
                                      4885    1.3
                                      6077    1.3
                                      6270    0.5
Length: 9379, dtype: float64

Make   Model                              
Acura  ILX                            6490    1.5
       ILX Premium & A-SPEC Packages  54      1.0
                                      262    -0.3
                                      1720   -0.1
       ILX Premium Package            2777    0.2
                                             ... 
Volvo  XC90 T6 R-Design               2436    1.3
                                      2811    0.5
                                      4885    1.3
                                      6077    1.3
                                      6270    0.5
Length: 9379, dtype: float64

Make   Model                              
Acura  ILX                            6490    1.5
       ILX Premium & A-SPEC Packages  54      1.0
                                      262    -0.3
                                      1720   -0.1
       ILX Premium Package            2777    0.2
                                             ... 
Volvo  XC90 T6 R-Design               2436    1.3
                                      2811    0.5
                                      4885    1.3
                                      6077    1.3
                                      6270    0.5
Length: 9379, dtype: float64

# What Are Diffrence Between `apply`, `transform` ?



+ transform can not aggregate
+ apply can aggregate (mean, max, etc)


+ apply works with multiple series at time
+ transfrom works with one series at time

# What Are Diffrence Between `apply`, `agg` ?

apply applies the function to each group (your Species). Your function returns 1, so you end up with 1 value for each of 3 groups.

agg aggregates each column (feature) for each group, so you end up with one value per column per group.

# Filtering

In [105]:
car_csv.ConsumerRating

0       4.6
1       4.8
2       4.7
3       5.0
4       4.8
       ... 
9374    4.7
9375    4.8
9376    4.8
9377    4.7
9378    4.8
Name: ConsumerRating, Length: 9379, dtype: float64

In [106]:
groups = car_csv.groupby(["Make", "Model"])
groups.ngroups

1304

In [107]:
filtered_groups = groups \
    .filter(lambda x: x.ConsumerRating.mean() > 4.98) \
    .groupby(["Make", "Model"]) \
    .apply(lambda x: x.ConsumerRating.mean()) \
    

filtered_groups

Make           Model                                 
Audi           A4 40 Premium                             5.0
               A4 40 Premium Plus                        5.0
               A4 45 S line quattro Premium              5.0
               Q3 45 S line Premium                      5.0
               Q5 S line Premium                         5.0
               Q7 55 quattro Premium Plus                5.0
BMW            228 Gran Coupe 228i sDrive Gran Coupe     5.0
               228 Gran Coupe i xDrive                   5.0
               750 i                                     5.0
               M5 Competition                            5.0
               M6 Base                                   5.0
               M850 i xDrive                             5.0
               Z4 sDrive30i                              5.0
Bentley        Bentayga Design Edition                   5.0
Cadillac       Escalade ESV Luxury                       5.0
               Escalade ESV Pre

# Combining DataFrames

## Append

In [108]:
df1 = pd.DataFrame(
    data    = np.arange(1200).reshape((400, 3)),
    columns = ["COL_1", "COL_2", "COL_3"],
    index   = [x + 1 for x in range(400)]
)

df2 = pd.DataFrame(
    data    = (np.arange(1200) * 0.5).reshape((400, 3)),
    columns = ["COL_1", "COL_2", "COL_3"],
    index   = [x + 1 for x in range(400)]
)

### Vertically

In [109]:
pd.concat([df1, df2])

Unnamed: 0,COL_1,COL_2,COL_3
1,0.0,1.0,2.0
2,3.0,4.0,5.0
3,6.0,7.0,8.0
4,9.0,10.0,11.0
5,12.0,13.0,14.0
...,...,...,...
396,592.5,593.0,593.5
397,594.0,594.5,595.0
398,595.5,596.0,596.5
399,597.0,597.5,598.0


### Horizontally

In [110]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,COL_1,COL_2,COL_3,COL_1.1,COL_2.1,COL_3.1
1,0,1,2,0.0,0.5,1.0
2,3,4,5,1.5,2.0,2.5
3,6,7,8,3.0,3.5,4.0
4,9,10,11,4.5,5.0,5.5
5,12,13,14,6.0,6.5,7.0
...,...,...,...,...,...,...
396,1185,1186,1187,592.5,593.0,593.5
397,1188,1189,1190,594.0,594.5,595.0
398,1191,1192,1193,595.5,596.0,596.5
399,1194,1195,1196,597.0,597.5,598.0


# Merge

In [111]:
movie_cols = ['movie_id', 'title']
movies = pd.read_table('ml-100k/u.item', sep='|', header=None, names=movie_cols, usecols=[0, 1])
movies.head()

Unnamed: 0,movie_id,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [112]:
rating_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_table('ml-100k/u.data', sep='\t', header=None, names=rating_cols)
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [113]:
movie_ratings = pd.merge(movies, ratings)
movie_ratings

Unnamed: 0,movie_id,title,user_id,rating,timestamp
0,1,Toy Story (1995),308,4,887736532
1,1,Toy Story (1995),287,5,875334088
2,1,Toy Story (1995),148,4,877019411
3,1,Toy Story (1995),280,4,891700426
4,1,Toy Story (1995),66,3,883601324
...,...,...,...,...,...
99995,1678,Mat' i syn (1997),863,1,889289570
99996,1679,B. Monkey (1998),863,3,889289491
99997,1680,Sliding Doors (1998),863,2,889289570
99998,1681,You So Crazy (1994),896,3,887160722


## Diffrent Names

In [114]:
movies.columns = ['m_id', 'title']
movies

Unnamed: 0,m_id,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)
...,...,...
1677,1678,Mat' i syn (1997)
1678,1679,B. Monkey (1998)
1679,1680,Sliding Doors (1998)
1680,1681,You So Crazy (1994)


In [115]:
pd.merge(movies, ratings, left_on='m_id', right_on='movie_id').head()


Unnamed: 0,m_id,title,user_id,movie_id,rating,timestamp
0,1,Toy Story (1995),308,1,4,887736532
1,1,Toy Story (1995),287,1,5,875334088
2,1,Toy Story (1995),148,1,4,877019411
3,1,Toy Story (1995),280,1,4,891700426
4,1,Toy Story (1995),66,1,3,883601324


## Join on Index

### One Index

In [116]:
movies = movies.set_index('m_id')
movies

Unnamed: 0_level_0,title
m_id,Unnamed: 1_level_1
1,Toy Story (1995)
2,GoldenEye (1995)
3,Four Rooms (1995)
4,Get Shorty (1995)
5,Copycat (1995)
...,...
1678,Mat' i syn (1997)
1679,B. Monkey (1998)
1680,Sliding Doors (1998)
1681,You So Crazy (1994)


In [117]:
pd.merge(movies, ratings, left_index=True, right_on='movie_id')

Unnamed: 0,title,user_id,movie_id,rating,timestamp
24,Toy Story (1995),308,1,4,887736532
454,Toy Story (1995),287,1,5,875334088
957,Toy Story (1995),148,1,4,877019411
971,Toy Story (1995),280,1,4,891700426
1324,Toy Story (1995),66,1,3,883601324
...,...,...,...,...,...
75323,Mat' i syn (1997),863,1678,1,889289570
67302,B. Monkey (1998),863,1679,3,889289491
80394,Sliding Doors (1998),863,1680,2,889289570
92329,You So Crazy (1994),896,1681,3,887160722


### Two Index

In [118]:
ratings = ratings.set_index('movie_id')
ratings

Unnamed: 0_level_0,user_id,rating,timestamp
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
242,196,3,881250949
302,186,3,891717742
377,22,1,878887116
51,244,2,880606923
346,166,1,886397596
...,...,...,...
476,880,3,880175444
204,716,5,879795543
1090,276,1,874795795
225,13,2,882399156


In [119]:
pd.merge(movies, ratings, left_index=True, right_index=True)

Unnamed: 0,title,user_id,rating,timestamp
1,Toy Story (1995),308,4,887736532
1,Toy Story (1995),287,5,875334088
1,Toy Story (1995),148,4,877019411
1,Toy Story (1995),280,4,891700426
1,Toy Story (1995),66,3,883601324
...,...,...,...,...
1678,Mat' i syn (1997),863,1,889289570
1679,B. Monkey (1998),863,3,889289491
1680,Sliding Doors (1998),863,2,889289570
1681,You So Crazy (1994),896,3,887160722


## Diffrent Joins

In [120]:
A = pd.DataFrame({'color': ['green', 'yellow', 'red'], 'num':[1, 2, 3]})
B = pd.DataFrame({'color': ['green', 'yellow', 'pink'], 'size':['S', 'M', 'L']})
display(A, B)

Unnamed: 0,color,num
0,green,1
1,yellow,2
2,red,3


Unnamed: 0,color,size
0,green,S
1,yellow,M
2,pink,L


In [121]:
pd.merge(A, B) # Inner By Default

Unnamed: 0,color,num,size
0,green,1,S
1,yellow,2,M


In [122]:
pd.merge(A, B, how='outer')

Unnamed: 0,color,num,size
0,green,1.0,S
1,yellow,2.0,M
2,red,3.0,
3,pink,,L


In [123]:
pd.merge(A, B, how='left')


Unnamed: 0,color,num,size
0,green,1,S
1,yellow,2,M
2,red,3,


In [124]:
pd.merge(A, B, how='right')


Unnamed: 0,color,num,size
0,green,1.0,S
1,yellow,2.0,M
2,pink,,L
