# How to manipulate, filter and sort an excel file

## Libraries

In [1]:
import pandas as pd
from openpyxl.workbook import workbook

[Dataset Meteorite Landinds](https://www.kaggle.com/datasets/nasa/meteorite-landings)

In [13]:
df = pd.read_csv('files/Meteorite_Landings.csv')
df.head(5)

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation
0,Aachen,1,Valid,L5,21.0,Fell,1880.0,50.775,6.08333,"(50.775, 6.08333)"
1,Aarhus,2,Valid,H6,720.0,Fell,1951.0,56.18333,10.23333,"(56.18333, 10.23333)"
2,Abee,6,Valid,EH4,107000.0,Fell,1952.0,54.21667,-113.0,"(54.21667, -113.0)"
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.9,"(16.88333, -99.9)"
4,Achiras,370,Valid,L6,780.0,Fell,1902.0,-33.16667,-64.95,"(-33.16667, -64.95)"


## How to get a row filtering with a specific value
* Get data of meteorite landing in the year 1976

In [24]:
df.loc[df['year'] == 1976.0]

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.9,"(16.88333, -99.9)"
238,Dhajala,6698,Valid,H3.8,45000.0,Fell,1976.0,22.37778,71.42722,"(22.37778, 71.42722)"
256,Dowa,7725,Valid,Stone-uncl,642.0,Fell,1976.0,-13.66667,33.91667,"(-13.66667, 33.91667)"
262,Dunhua,7749,Valid,Stone-uncl,,Fell,1976.0,43.33333,128.25,"(43.33333, 128.25)"
337,Grefsheim,11196,Valid,L5,45.5,Fell,1976.0,60.66667,11.0,"(60.66667, 11.0)"
419,Jilin,12171,Valid,H5,4000000.0,Fell,1976.0,44.05,126.16667,"(44.05, 126.16667)"
428,Junan,12210,Valid,L6,950.0,Fell,1976.0,35.2,118.8,"(35.2, 118.8)"
821,Qingzhen,18908,Valid,EH3,2600.0,Fell,1976.0,26.53333,106.46667,"(26.53333, 106.46667)"
863,Ruhobobo,22780,Valid,L6,465.5,Fell,1976.0,-1.45,29.83333,"(-1.45, 29.83333)"
914,Sheyang,23531,Valid,L6,605.0,Fell,1976.0,33.65,120.06667,"(33.65, 120.06667)"


* Now get data of meteorite landing in the year 1976 and named *Acapulco*

In [25]:
df.loc[(df['year'] == 1976.0) & (df['name'] == 'Acapulco')]

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.9,"(16.88333, -99.9)"


## How to apply a *lambda* function
* Create a new column to store the mass of a meteroite in kg units

In [29]:
df['mass(kg)'] = df['mass (g)'].apply(lambda x: x / 1000)

In [30]:
df.head(4)

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation,mass(kg)
0,Aachen,1,Valid,L5,21.0,Fell,1880.0,50.775,6.08333,"(50.775, 6.08333)",0.021
1,Aarhus,2,Valid,H6,720.0,Fell,1951.0,56.18333,10.23333,"(56.18333, 10.23333)",0.72
2,Abee,6,Valid,EH4,107000.0,Fell,1952.0,54.21667,-113.0,"(54.21667, -113.0)",107.0
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.9,"(16.88333, -99.9)",1.914


## How to drop columns
* Create a new dataframe dropping the columns: *fall* and *GeoLocation*

In [32]:
to_drop = ['fall','GeoLocation']

In [33]:
df.drop(columns=to_drop, inplace=True)

In [35]:
df.head(5)

Unnamed: 0,name,id,nametype,recclass,mass (g),year,reclat,reclong,mass(kg)
0,Aachen,1,Valid,L5,21.0,1880.0,50.775,6.08333,0.021
1,Aarhus,2,Valid,H6,720.0,1951.0,56.18333,10.23333,0.72
2,Abee,6,Valid,EH4,107000.0,1952.0,54.21667,-113.0,107.0
3,Acapulco,10,Valid,Acapulcoite,1914.0,1976.0,16.88333,-99.9,1.914
4,Achiras,370,Valid,L6,780.0,1902.0,-33.16667,-64.95,0.78
