In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame({
    'Item':['Apple','Orange','Baana','Watermelon'],
    'Quantity':[12,10,100,3],
    'Price':[1,2,0.5,9]
})
df

Unnamed: 0,Item,Quantity,Price
0,Apple,12,1.0
1,Orange,10,2.0
2,Baana,100,0.5
3,Watermelon,3,9.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Item      4 non-null      object 
 1   Quantity  4 non-null      int64  
 2   Price     4 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 228.0+ bytes


In [6]:
df.columns

Index(['Item', 'Quantity', 'Price'], dtype='object')

In [7]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [8]:
df.size

12

In [9]:
df.shape

(4, 3)

In [10]:
df.dtypes

Item         object
Quantity      int64
Price       float64
dtype: object

In [12]:
df['Item']

0         Apple
1        Orange
2         Baana
3    Watermelon
Name: Item, dtype: object

In [13]:
df['Price']

0    1.0
1    2.0
2    0.5
3    9.0
Name: Price, dtype: float64

In [14]:
df['Quantity']

0     12
1     10
2    100
3      3
Name: Quantity, dtype: int64

In [15]:
df['Item'][0]

'Apple'

In [16]:
df['Price'][3]

9.0

In [18]:
df.loc[0]

Item        Apple
Quantity       12
Price         1.0
Name: 0, dtype: object

In [19]:
df.loc[2]

Item        Baana
Quantity      100
Price         0.5
Name: 2, dtype: object

In [20]:
df['Price'] > 5

0    False
1    False
2    False
3     True
Name: Price, dtype: bool

In [21]:
df[df['Price'] > 5]

Unnamed: 0,Item,Quantity,Price
3,Watermelon,3,9.0


In [22]:
df['Quantity'] < 20

0     True
1     True
2    False
3     True
Name: Quantity, dtype: bool

In [23]:
df[df['Quantity'] < 20]

Unnamed: 0,Item,Quantity,Price
0,Apple,12,1.0
1,Orange,10,2.0
3,Watermelon,3,9.0


In [24]:
result = df['Price'] < 5
result

0     True
1     True
2     True
3    False
Name: Price, dtype: bool

In [25]:
df[result]

Unnamed: 0,Item,Quantity,Price
0,Apple,12,1.0
1,Orange,10,2.0
2,Baana,100,0.5


In [26]:
df['Total'] = df['Price'] * df['Quantity']
df

Unnamed: 0,Item,Quantity,Price,Total
0,Apple,12,1.0,12.0
1,Orange,10,2.0,20.0
2,Baana,100,0.5,50.0
3,Watermelon,3,9.0,27.0


In [27]:
total = df['Total']
total

0    12.0
1    20.0
2    50.0
3    27.0
Name: Total, dtype: float64

In [28]:
total.min()

12.0

In [29]:
total.max()

50.0

In [30]:
total.mean()

27.25

In [31]:
total.var()

267.5833333333333

In [33]:
total.std()

16.357974609753292

In [34]:
total.median()

23.5

In [36]:
total.quantile(0.25)

18.0

In [38]:
total.quantile([0.25,0.5,0.75])

0.25    18.00
0.50    23.50
0.75    32.75
Name: Total, dtype: float64

In [39]:
df.describe()

Unnamed: 0,Quantity,Price,Total
count,4.0,4.0,4.0
mean,31.25,3.125,27.25
std,45.995471,3.966001,16.357975
min,3.0,0.5,12.0
25%,8.25,0.875,18.0
50%,11.0,1.5,23.5
75%,34.0,3.75,32.75
max,100.0,9.0,50.0


In [41]:
path = 'Data/california_housing_test.csv'
df = pd.read_csv(path)
df

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.30,34.26,43.0,1510.0,310.0,809.0,277.0,3.5990,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0
...,...,...,...,...,...,...,...,...,...
2995,-119.86,34.42,23.0,1450.0,642.0,1258.0,607.0,1.1790,225000.0
2996,-118.14,34.06,27.0,5257.0,1082.0,3496.0,1036.0,3.3906,237200.0
2997,-119.70,36.30,10.0,956.0,201.0,693.0,220.0,2.2895,62000.0
2998,-117.12,34.10,40.0,96.0,14.0,46.0,14.0,3.2708,162500.0


In [42]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0


In [43]:
df.head(10)

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0
5,-119.56,36.51,37.0,1018.0,213.0,663.0,204.0,1.6635,67000.0
6,-121.43,38.63,43.0,1009.0,225.0,604.0,218.0,1.6641,67000.0
7,-120.65,35.48,19.0,2310.0,471.0,1341.0,441.0,3.225,166900.0
8,-122.84,38.4,15.0,3080.0,617.0,1446.0,599.0,3.6696,194400.0
9,-118.02,34.08,31.0,2402.0,632.0,2830.0,603.0,2.3333,164200.0


In [44]:
df.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
count,3000.0,3000.0,3000.0,3000.0,3000.0,3000.0,3000.0,3000.0,3000.0
mean,-119.5892,35.63539,28.845333,2599.578667,529.950667,1402.798667,489.912,3.807272,205846.275
std,1.994936,2.12967,12.555396,2155.593332,415.654368,1030.543012,365.42271,1.854512,113119.68747
min,-124.18,32.56,1.0,6.0,2.0,5.0,2.0,0.4999,22500.0
25%,-121.81,33.93,18.0,1401.0,291.0,780.0,273.0,2.544,121200.0
50%,-118.485,34.27,29.0,2106.0,437.0,1155.0,409.5,3.48715,177650.0
75%,-118.02,37.69,37.0,3129.0,636.0,1742.75,597.25,4.656475,263975.0
max,-114.49,41.92,52.0,30450.0,5419.0,11935.0,4930.0,15.0001,500001.0
