### Pandas

Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool, built on top of the python programming language.
It provides highly optimized performance with back-end and source code is purely written in C or Python.

1D series: 1D labeled homogenously-typed array

2D DataFrame: general 2D labeled size-mutable tabular structure with potentially heterogenously-typed column

In [2]:
import pandas as pd
import numpy as np

In [3]:
l=list('ABCDE')
l

['A', 'B', 'C', 'D', 'E']

In [5]:
s=pd.Series(l)
s
#only the list items here are data and the output is 1-dimensional.
#the first column is index values
#also string data type is not there in pandas, it is called object.

0    A
1    B
2    C
3    D
4    E
dtype: object

In [46]:
s1=pd.Series(l, index=[34,2,'A',34,12])
s1
#multiple data values can have the same index values
#no. of index values should be same as no.of data values

34    A
2     B
A     C
34    D
12    E
dtype: object

In [8]:
s1[34]

34    A
34    D
dtype: object

In [49]:
s1[12]

'E'

In [47]:
s1.34

SyntaxError: invalid syntax (436401596.py, line 1)

In [48]:
s1.A  #if index is in string format this is also one way to access them

'C'

In [51]:
s1['A']

'C'

In [10]:
l2= [list('ABCD'),[23,90,5,12]]
l2

[['A', 'B', 'C', 'D'], [23, 90, 5, 12]]

In [11]:
df=pd.DataFrame(l2)
df
#here columns 0-3 have object data type for both the string and numeral rows

Unnamed: 0,0,1,2,3
0,A,B,C,D
1,23,90,5,12


In [12]:
l3 = [['A',23],
      ['B',90],
      ['C',5],
      ['D',12]]
l3

[['A', 23], ['B', 90], ['C', 5], ['D', 12]]

In [16]:
df3= pd.DataFrame(l3)
df3
#here column 0 has object datatype whereas column 1 has integer datatype

Unnamed: 0,0,1
0,A,23
1,B,90
2,C,5
3,D,12


In [21]:
df3.dtypes

0    object
1     int64
dtype: object

In [24]:
pd.DataFrame(l3, columns=['Name', 'Rank'], index=['#1', '#2', '#3', '#4'])

Unnamed: 0,Name,Rank
#1,A,23
#2,B,90
#3,C,5
#4,D,12


In [25]:
d={'Name': list('ABCD'), 'Rank': [23,90,5,12]}
d

{'Name': ['A', 'B', 'C', 'D'], 'Rank': [23, 90, 5, 12]}

In [26]:
dfd = pd.DataFrame(d)  #keys of dictionary becomes column names
dfd

Unnamed: 0,Name,Rank
0,A,23
1,B,90
2,C,5
3,D,12


In [27]:
d1={'Name': list('ABCD'), 'Rank': [23,90,5]}
dfd1 = pd.DataFrame(d1)  
dfd1
#ValueError: All arrays must be of the same length
#use np.NaN to print NaN there or print 'None' or anything to make array 
#lengths equal.

ValueError: All arrays must be of the same length

In [28]:
#but same thing in list
l3 = [['A',23],
      ['B',90],
      ['C'],
      ['D',12]]
df3= pd.DataFrame(l3)
df3
#prints NaN there instead of throwing error
#also NaN:Not a Number is a floating point number therefore all elements in
#column are converted to float values

Unnamed: 0,0,1
0,A,23.0
1,B,90.0
2,C,
3,D,12.0


In [30]:
d1=[{'Name': 'A', 'Rank': 23},
    {'Name': 'B', 'Rank': 90},
    {'Name': 'C', 'Rank': 5, 'City': 'Delhi'},
    {'Name': 'D', 'Rank': 12}]
pd.DataFrame(d1)
#hence dictionary in a list works even for not given values

Unnamed: 0,Name,Rank,City
0,A,23,
1,B,90,
2,C,5,Delhi
3,D,12,


In [None]:
#csv files-> COMMA SEPARATED VALUES
#advantage of these are that we can open them in notepad excel etc. 
#The changes made in excel such as cell size, font size, font colour, etc 
#do not show in other apps we open csv files therefore file sizes are 
#lighter than in excel format. Heavier the size, more processing it will
#take. Also, csv files can be opened in many places like google browser 
#on your phones etc.

In [31]:
nba = pd.read_csv('nba.csv')
nba  #shows only ten rows; first 5 and last 5 and total number of rows 
#and columns

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [33]:
pd.set_option('display.max_rows', None) #we can see the entire data now.

In [34]:
nba = pd.read_csv('nba.csv')
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


In [35]:
pd.set_option('display.max_rows', 10)

In [36]:
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [38]:
n=pd.read_csv('nba.csv', index_col = 'Name')
n
#name column is not counted in data anymore. it is saved as index column
#therefore it shows 8 columns now instead of 9.

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...
Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [40]:
nbaExcel = pd.read_excel('nba.xlsx')
nbaExcel

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0,PG,25,2024-02-06 00:00:00,180,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99,SF,25,2024-06-06 00:00:00,235,Marquette,6796117.0
2,John Holland,Boston Celtics,30,SG,27,2024-05-06 00:00:00,205,Boston University,
3,R.J. Hunter,Boston Celtics,28,SG,22,2024-05-06 00:00:00,185,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8,PF,29,2024-10-06 00:00:00,231,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41,PF,20,2024-10-06 00:00:00,234,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8,PG,26,2024-03-06 00:00:00,203,Butler,2433333.0
454,Raul Neto,Utah Jazz,25,PG,24,2024-01-06 00:00:00,179,,900000.0
455,Tibor Pleiss,Utah Jazz,21,C,26,2024-03-07 00:00:00,256,,2900000.0


In [41]:
pd.read_csv?

In [None]:
#pd.read_sql('nba.db')   to read sql file 

In [42]:
airfile = pd.read_csv('https://raw.githubusercontent.com/datasets/airport-codes/master/data/airport-codes.csv')
airfile

Unnamed: 0,ident,type,name,elevation_ft,continent,iso_country,iso_region,municipality,gps_code,iata_code,local_code,coordinates
0,00A,heliport,Total Rf Heliport,11.0,,US,US-PA,Bensalem,00A,,00A,"40.07080078125, -74.93360137939453"
1,00AA,small_airport,Aero B Ranch Airport,3435.0,,US,US-KS,Leoti,00AA,,00AA,"38.704022, -101.473911"
2,00AK,small_airport,Lowell Field,450.0,,US,US-AK,Anchor Point,00AK,,00AK,"59.947733, -151.692524"
3,00AL,small_airport,Epps Airpark,820.0,,US,US-AL,Harvest,00AL,,00AL,"34.86479949951172, -86.77030181884766"
4,00AN,small_airport,Katmai Lodge Airport,80.0,,US,US-AK,King Salmon,00AN,,00AN,"59.093287, -156.456699"
...,...,...,...,...,...,...,...,...,...,...,...,...
75843,ZZ-0001,heliport,Sealand Helipad,40.0,EU,GB,GB-ENG,Sealand,,,,"51.894444, 1.4825"
75844,ZZ-0002,small_airport,Glorioso Islands Airstrip,11.0,AF,TF,TF-U-A,Grande Glorieuse,,,,"-11.584277777799999, 47.296388888900005"
75845,ZZ-0003,small_airport,Fainting Goat Airport,690.0,,US,US-TX,Blum,87TX,,87TX,"32.110587, -97.356312"
75846,ZZZW,closed,Scandium City Heliport,4.0,,CA,CA-YT,(Old) Scandium City,,,,"69.355287, -138.93931"


In [43]:
airfile.to_csv('Airports.csv')  #converting python data to csv format
#if i don't give a path or write the name of the folder where i want to
#store the file, it automatically gets saved to where the jupyter notebook 
#is opened. if i give a path, the file will be stored there.

#ALSO, if i make changes to the data here and then run this code again to 
#save the file  with the same name, then python will overwrite the new data
#over the old file.

In [44]:
airfile.to_csv?

### Indexing in DataFrame

In [45]:
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [52]:
#Column Indexing
#Single Column Indexing

In [137]:
nba['Position']

0      PG
1      SF
2      SG
3      SG
4      PF
       ..
452    PF
453    PG
454    PG
455     C
456     C
Name: Position, Length: 457, dtype: object

In [57]:
nba.Position

0      PG
1      SF
2      SG
3      SG
4      PF
       ..
452    PF
453    PG
454    PG
455     C
456     C
Name: Position, Length: 457, dtype: object

In [55]:
#Multi Column Indexing

In [60]:
nb_psc= nba[['Position', 'Salary', 'College']]
nb_psc

Unnamed: 0,Position,Salary,College
0,PG,7730337.0,Texas
1,SF,6796117.0,Marquette
2,SG,,Boston University
3,SG,1148640.0,Georgia State
4,PF,5000000.0,
...,...,...,...
452,PF,2239800.0,Kentucky
453,PG,2433333.0,Butler
454,PG,900000.0,
455,C,2900000.0,


In [138]:
nb_pos= nba[['Position']]
nb_pos

Unnamed: 0,Position
0,PG
1,SF
2,SG
3,SG
4,PF
...,...
452,PF
453,PG
454,PG
455,C


In [139]:
type(nb_pos)
#each and every column of a dataframe is individually a series!!

pandas.core.frame.DataFrame

In [63]:
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [62]:
#Row Indexing
#Single Row Indexing

In [65]:
nba.loc[2]
#each and every row of a dataframe is individually a series!!

Name             John Holland
Team           Boston Celtics
Number                   30.0
Position                   SG
Age                      27.0
Height                    6-5
Weight                  205.0
College     Boston University
Salary                    NaN
Name: 2, dtype: object

In [66]:
#Multi Row Indexing

In [64]:
nba.loc[[2]]

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [68]:
nba.loc[[2, 52,114,235,101,351,453]]

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
52,Carl Landry,Philadelphia 76ers,7.0,PF,32.0,6-9,248.0,Purdue,6500000.0
114,Larry Nance Jr.,Los Angeles Lakers,7.0,PF,23.0,6-9,230.0,Wyoming,1155600.0
235,Salah Mejri,Dallas Mavericks,50.0,C,29.0,7-2,245.0,,525093.0
101,Paul Pierce,Los Angeles Clippers,34.0,SF,38.0,6-7,235.0,Kansas,3376000.0
351,Hassan Whiteside,Miami Heat,21.0,C,26.0,7-0,265.0,Marshall,981348.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0


In [69]:
n

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...
Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [70]:
nba.loc[[2]]

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [71]:
n.loc[2]

KeyError: 2

In [74]:
n.loc[['John Holland']]

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


another function called iloc can be used for such datframes where one of the columns are the index columns, hence making data difficult to access in a continuous/loop fashion.
pyhon always assigns/maintains a zero based index in the backend even if not present in the dataframe. iloc stands or integer based location and it accesses the integer index assigned in the backend. this is the same way it works for strings, tuples, lists etc. we can't see the indexing but it is present in the backend.

In [75]:
n.iloc[[2]]

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [76]:
nba.iloc[[2]]

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [90]:
nc=nba.copy()   
#it is never advisable to change the base data. hitting the server again 
#and again to request data creates an unnecessary load on it
nc

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


#nc.set_index?
nc.set_index(keys,
             *,
    drop: 'bool' = True,
    append: 'bool' = False,
    inplace: 'bool' = False,
    verify_integrity: 'bool' = False,
) -> 'DataFrame | None'

In [183]:
nb_num= nc.set_index('Number', drop=False)
nb_num

Unnamed: 0_level_0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
99.0,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
30.0,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
28.0,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
8.0,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
41.0,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
8.0,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
25.0,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
21.0,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [186]:
nb_num2= nc.set_index('Number', drop=True)  
nb_num2
#if drop is true the column set as index is removed from data frame

Unnamed: 0_level_0,Name,Team,Position,Age,Height,Weight,College,Salary
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.0,Avery Bradley,Boston Celtics,PG,25.0,6-2,180.0,Texas,7730337.0
99.0,Jae Crowder,Boston Celtics,SF,25.0,6-6,235.0,Marquette,6796117.0
30.0,John Holland,Boston Celtics,SG,27.0,6-5,205.0,Boston University,
28.0,R.J. Hunter,Boston Celtics,SG,22.0,6-5,185.0,Georgia State,1148640.0
8.0,Jonas Jerebko,Boston Celtics,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...
41.0,Trey Lyles,Utah Jazz,PF,20.0,6-10,234.0,Kentucky,2239800.0
8.0,Shelvin Mack,Utah Jazz,PG,26.0,6-3,203.0,Butler,2433333.0
25.0,Raul Neto,Utah Jazz,PG,24.0,6-1,179.0,,900000.0
21.0,Tibor Pleiss,Utah Jazz,C,26.0,7-3,256.0,,2900000.0


In [92]:
nc #still remains the same because of inplace

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [None]:
#if we add inplace = True then it means we want to do the modifications to 
#the same dataframe. by default, inplace is set to False.

In [94]:
nct=nc.set_index('Number', drop=False)
nct

Unnamed: 0_level_0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
99.0,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
30.0,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
28.0,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
8.0,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
41.0,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
8.0,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
25.0,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
21.0,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [95]:
nc.set_index('Number', drop=False, inplace= True)   #inplace argument

In [96]:
nc #nc has changed now

Unnamed: 0_level_0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
99.0,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
30.0,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
28.0,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
8.0,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
41.0,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
8.0,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
25.0,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
21.0,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [87]:
## Slicing

In [97]:
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [98]:
nba.loc[34:39]

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
34,Jose Calderon,New York Knicks,3.0,PG,34.0,6-3,200.0,,7402812.0
35,Cleanthony Early,New York Knicks,11.0,SF,25.0,6-8,210.0,Wichita State,845059.0
36,Langston Galloway,New York Knicks,2.0,SG,24.0,6-2,200.0,Saint Joseph's,845059.0
37,Jerian Grant,New York Knicks,13.0,PG,23.0,6-4,195.0,Notre Dame,1572360.0
38,Robin Lopez,New York Knicks,8.0,C,28.0,7-0,255.0,Stanford,12650000.0
39,Kyle O'Quinn,New York Knicks,9.0,PF,26.0,6-10,250.0,Norfolk State,3750000.0


In [99]:
nba[34:39]  #in this method, end index is not inclusive

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
34,Jose Calderon,New York Knicks,3.0,PG,34.0,6-3,200.0,,7402812.0
35,Cleanthony Early,New York Knicks,11.0,SF,25.0,6-8,210.0,Wichita State,845059.0
36,Langston Galloway,New York Knicks,2.0,SG,24.0,6-2,200.0,Saint Joseph's,845059.0
37,Jerian Grant,New York Knicks,13.0,PG,23.0,6-4,195.0,Notre Dame,1572360.0
38,Robin Lopez,New York Knicks,8.0,C,28.0,7-0,255.0,Stanford,12650000.0


In [100]:
nba.loc[32:40:2]

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
32,Thanasis Antetokounmpo,New York Knicks,43.0,SF,23.0,6-7,205.0,,30888.0
34,Jose Calderon,New York Knicks,3.0,PG,34.0,6-3,200.0,,7402812.0
36,Langston Galloway,New York Knicks,2.0,SG,24.0,6-2,200.0,Saint Joseph's,845059.0
38,Robin Lopez,New York Knicks,8.0,C,28.0,7-0,255.0,Stanford,12650000.0
40,Kristaps Porzingis,New York Knicks,6.0,PF,20.0,7-3,240.0,,4131720.0


In [101]:
nba[32:40:2]  #steps work similarly

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
32,Thanasis Antetokounmpo,New York Knicks,43.0,SF,23.0,6-7,205.0,,30888.0
34,Jose Calderon,New York Knicks,3.0,PG,34.0,6-3,200.0,,7402812.0
36,Langston Galloway,New York Knicks,2.0,SG,24.0,6-2,200.0,Saint Joseph's,845059.0
38,Robin Lopez,New York Knicks,8.0,C,28.0,7-0,255.0,Stanford,12650000.0


In [105]:
nba.loc[:4]  #slicing similarly works for iloc as well

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [106]:
#there is also a function to get the above output, which gives the first 
#five rows of the dataset
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [107]:
#you can also give end index till the number of rows you want
nba.head(10)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


In [108]:
nba.tail() #works similary just from the end of the dataset

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [109]:
nba.tail(10)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
447,Rudy Gobert,Utah Jazz,27.0,C,23.0,7-1,245.0,,1175880.0
448,Gordon Hayward,Utah Jazz,20.0,SF,26.0,6-8,226.0,Butler,15409570.0
449,Rodney Hood,Utah Jazz,5.0,SG,23.0,6-8,206.0,Duke,1348440.0
450,Joe Ingles,Utah Jazz,2.0,SF,28.0,6-8,226.0,,2050000.0
451,Chris Johnson,Utah Jazz,23.0,SF,26.0,6-6,206.0,Dayton,981348.0
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [116]:
nba.sample()  #gives a random data sample from the data record.

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
164,Derrick Rose,Chicago Bulls,1.0,PG,27.0,6-3,190.0,Memphis,20093064.0


In [121]:
nba.sample(4)  
#gives you the required number(your input) of samples randomly everytime

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
308,David West,San Antonio Spurs,30.0,PF,35.0,6-9,250.0,Xavier,1499187.0
271,Zach Randolph,Memphis Grizzlies,50.0,PF,34.0,6-9,260.0,Michigan State,9638555.0
233,Wesley Matthews,Dallas Mavericks,23.0,SG,29.0,6-5,220.0,Marquette,16407500.0
341,Goran Dragic,Miami Heat,7.0,PG,30.0,6-3,190.0,,14783000.0


In [122]:
nba.index

RangeIndex(start=0, stop=457, step=1)

In [123]:
e= pd.DataFrame(l3, columns=['Name', 'Rank'])

In [124]:
e

Unnamed: 0,Name,Rank
0,A,23.0
1,B,90.0
2,C,
3,D,12.0


In [125]:
e.index

RangeIndex(start=0, stop=4, step=1)

In [126]:
e1= pd.DataFrame(l3, columns=['Name', 'Rank'], index=['#1', '#2', '#3', '#4'])
e1

Unnamed: 0,Name,Rank
#1,A,23.0
#2,B,90.0
#3,C,
#4,D,12.0


In [127]:
e1.index

Index(['#1', '#2', '#3', '#4'], dtype='object')

In [128]:
e1= pd.DataFrame(l3, columns=['Name', 'Rank'], index=[4, 22, 8, 0])
e1

Unnamed: 0,Name,Rank
4,A,23.0
22,B,90.0
8,C,
0,D,12.0


In [129]:
e1.index

Index([4, 22, 8, 0], dtype='int64')

In [130]:
e1= pd.DataFrame(l3, columns=['Name', 'Rank'], index=[22, '#2', '#3', 3.5])
e1

Unnamed: 0,Name,Rank
22,A,23.0
#2,B,90.0
#3,C,
3.5,D,12.0


In [131]:
e1.index

Index([22, '#2', '#3', 3.5], dtype='object')

In [132]:
list(e1.index)

[22, '#2', '#3', 3.5]

In [133]:
list(nba.columns)

['Name',
 'Team',
 'Number',
 'Position',
 'Age',
 'Height',
 'Weight',
 'College',
 'Salary']

In [134]:
nba.columns

Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
       'College', 'Salary'],
      dtype='object')

In [135]:
e1.columns

Index(['Name', 'Rank'], dtype='object')

In [140]:
nb_pos

Unnamed: 0,Position
0,PG
1,SF
2,SG
3,SG
4,PF
...,...
452,PF
453,PG
454,PG
455,C


In [141]:
nb_pos.loc[[4]]

Unnamed: 0,Position
4,PF


In [142]:
nb_pos.loc[4]

Position    PF
Name: 4, dtype: object

In [148]:
nb_pos[2:5]

Unnamed: 0,Position
2,SG
3,SG
4,PF


In [145]:
nba[['Position']].loc[[4]]  #same thing can be done this way as well
#or nba.loc[[4]][['Position']]

Unnamed: 0,Position
4,PF


In [150]:
nba.Position

0      PG
1      SF
2      SG
3      SG
4      PF
       ..
452    PF
453    PG
454    PG
455     C
456     C
Name: Position, Length: 457, dtype: object

In [152]:
nba.Position.unique()  
#gives you the different types of data values without repetition

array(['PG', 'SF', 'SG', 'PF', 'C'], dtype=object)

In [153]:
list(nba.Name)

['Avery Bradley',
 'Jae Crowder',
 'John Holland',
 'R.J. Hunter',
 'Jonas Jerebko',
 'Amir Johnson',
 'Jordan Mickey',
 'Kelly Olynyk',
 'Terry Rozier',
 'Marcus Smart',
 'Jared Sullinger',
 'Isaiah Thomas',
 'Evan Turner',
 'James Young',
 'Tyler Zeller',
 'Bojan Bogdanovic',
 'Markel Brown',
 'Wayne Ellington',
 'Rondae Hollis-Jefferson',
 'Jarrett Jack',
 'Sergey Karasev',
 'Sean Kilpatrick',
 'Shane Larkin',
 'Brook Lopez',
 'Chris McCullough',
 'Willie Reed',
 'Thomas Robinson',
 'Henry Sims',
 'Donald Sloan',
 'Thaddeus Young',
 'Arron Afflalo',
 'Lou Amundson',
 'Thanasis Antetokounmpo',
 'Carmelo Anthony',
 'Jose Calderon',
 'Cleanthony Early',
 'Langston Galloway',
 'Jerian Grant',
 'Robin Lopez',
 "Kyle O'Quinn",
 'Kristaps Porzingis',
 'Kevin Seraphin',
 'Lance Thomas',
 'Sasha Vujacic',
 'Derrick Williams',
 'Tony Wroten',
 'Elton Brand',
 'Isaiah Canaan',
 'Robert Covington',
 'Joel Embiid',
 'Jerami Grant',
 'Richaun Holmes',
 'Carl Landry',
 'Kendall Marshall',
 'T.J. M

In [154]:
list(nba.College.unique())

['Texas',
 'Marquette',
 'Boston University',
 'Georgia State',
 nan,
 'LSU',
 'Gonzaga',
 'Louisville',
 'Oklahoma State',
 'Ohio State',
 'Washington',
 'Kentucky',
 'North Carolina',
 'Arizona',
 'Georgia Tech',
 'Cincinnati',
 'Miami (FL)',
 'Stanford',
 'Syracuse',
 'Saint Louis',
 'Kansas',
 'Georgetown',
 'Texas A&M',
 'UCLA',
 'UNLV',
 'Wichita State',
 "Saint Joseph's",
 'Notre Dame',
 'Norfolk State',
 'Duke',
 'Murray State',
 'Tennessee State',
 'Bowling Green',
 'Purdue',
 'Wake Forest',
 'Michigan',
 'Missouri',
 'USC',
 'Villanova',
 'Rider',
 'Utah',
 'Belmont',
 'Davidson',
 'Vanderbilt',
 'Michigan State',
 'Florida',
 'Washington State',
 'Arizona State',
 'Oklahoma',
 'Wyoming',
 "St. John's",
 'Maryland',
 'Wisconsin',
 'Utah Valley',
 'North Carolina State',
 'UC Santa Barbara',
 'Baylor',
 'Connecticut',
 'Oregon State',
 'New Mexico',
 'Oregon',
 'Creighton',
 'Arkansas',
 'Memphis',
 "Saint Mary's",
 'Tennessee',
 'Alabama',
 'Georgia',
 'Colorado',
 'Boston Co

In [157]:
nbc=nba.copy()
nbc

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [166]:
nbc.sort_values(by= ['Name']) 
#sorting dataframe alphabetically on the basis of name
#see differnt types of sorting ways and methods in function explanation

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
152,Aaron Brooks,Chicago Bulls,0.0,PG,31.0,6-0,161.0,Oregon,2250000.0
356,Aaron Gordon,Orlando Magic,0.0,PF,20.0,6-9,220.0,Arizona,4171680.0
328,Aaron Harrison,Charlotte Hornets,9.0,SG,21.0,6-6,210.0,Kentucky,525093.0
404,Adreian Payne,Minnesota Timberwolves,33.0,PF,25.0,6-10,237.0,Michigan State,1938840.0
312,Al Horford,Atlanta Hawks,15.0,C,30.0,6-10,245.0,Florida,12000000.0
...,...,...,...,...,...,...,...,...,...
386,Wilson Chandler,Denver Nuggets,21.0,SF,29.0,6-8,225.0,DePaul,10449438.0
270,Xavier Munford,Memphis Grizzlies,14.0,PG,24.0,6-3,180.0,Rhode Island,
402,Zach LaVine,Minnesota Timberwolves,8.0,PG,21.0,6-5,189.0,UCLA,2148360.0
271,Zach Randolph,Memphis Grizzlies,50.0,PF,34.0,6-9,260.0,Michigan State,9638555.0


nbc.sort_values(
    by: 'IndexLabel',
    *,
    axis: 'Axis' = 0,
    ascending: 'bool | list[bool] | tuple[bool, ...]' = True,
    inplace: 'bool' = False,
    kind: 'str' = 'quicksort',
    na_position: 'str' = 'last',
    ignore_index: 'bool' = False,
    key: 'ValueKeyFunc' = None,
) -> 'DataFrame | None'
Docstring:
Sort by the values along either axis.

Parameters
----------
by : str or list of str
    Name or list of names to sort by.

    - if `axis` is 0 or `'index'` then `by` may contain index
      levels and/or column labels.
    - if `axis` is 1 or `'columns'` then `by` may contain column
      levels and/or index labels.
axis : {0 or 'index', 1 or 'columns'}, default 0
     Axis to be sorted.
ascending : bool or list of bool, default True
     Sort ascending vs. descending. Specify list for multiple sort
     orders.  If this is a list of bools, must match the length of
     the by.
inplace : bool, default False
     If True, perform operation in-place.
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
     Choice of sorting algorithm. See also :func:`numpy.sort` for more
     information. `mergesort` and `stable` are the only stable algorithms. For
     DataFrames, this option is only applied when sorting on a single
     column or label.
na_position : {'first', 'last'}, default 'last'
     Puts NaNs at the beginning if `first`; `last` puts NaNs at the
     end.
ignore_index : bool, default False
     If True, the resulting axis will be labeled 0, 1, …, n - 1.
key : callable, optional
    Apply the key function to the values
    before sorting. This is similar to the `key` argument in the
    builtin :meth:`sorted` function, with the notable difference that
    this `key` function should be *vectorized*. It should expect a
    ``Series`` and return a Series with the same shape as the input.
    It will be applied to each column in `by` independently.

In [168]:
nbc.sort_index?
#sorts on basis of index values. can also sort columns using axis=0

### TOPIC SPECIFIC OPERATIONS ON DATAFRAME

In [169]:
nbc

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [170]:
#eg: if you want to split name column into two columns based on first name 
#and last name
new = nbc['Name'].split(" ")  
#DOESN"T work because you can't split a series 
new

AttributeError: 'Series' object has no attribute 'split'

In [171]:
#instead
new = nbc['Name'].str.split(" ") 
#therefore applying split method over every string value of the column 
#series
new

0      [Avery, Bradley]
1        [Jae, Crowder]
2       [John, Holland]
3        [R.J., Hunter]
4      [Jonas, Jerebko]
             ...       
452       [Trey, Lyles]
453     [Shelvin, Mack]
454        [Raul, Neto]
455     [Tibor, Pleiss]
456      [Jeff, Withey]
Name: Name, Length: 457, dtype: object

In [172]:
new = nbc['Name'].str.split(" ", expand = True) 
#this argument gives output after splitting into diff columns in the 
#dataframe
new
#getting 5 columns here because of middle names or more than 1 space in between 

Unnamed: 0,0,1,2,3,4
0,Avery,Bradley,,,
1,Jae,Crowder,,,
2,John,Holland,,,
3,R.J.,Hunter,,,
4,Jonas,Jerebko,,,
...,...,...,...,...,...
452,Trey,Lyles,,,
453,Shelvin,Mack,,,
454,Raul,Neto,,,
455,Tibor,Pleiss,,,


In [178]:
#if you only want first name and rest in one or two column, ie you want to
#dictate the number of parts the string in the series is splittled into, use:
new = nbc['Name'].str.split(" ", n=1, expand = True) 

In [179]:
new

Unnamed: 0,0,1
0,Avery,Bradley
1,Jae,Crowder
2,John,Holland
3,R.J.,Hunter
4,Jonas,Jerebko
...,...,...
452,Trey,Lyles
453,Shelvin,Mack
454,Raul,Neto
455,Tibor,Pleiss


In [193]:
new[0]

0        Avery
1          Jae
2         John
3         R.J.
4        Jonas
        ...   
452       Trey
453    Shelvin
454       Raul
455      Tibor
456       Jeff
Name: 0, Length: 457, dtype: object

In [177]:
nbc

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [180]:
nbc['First Name'] = new[0]
nbc['Last Name'] = new[1]
nbc

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,First Name,Last Name
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Avery,Bradley
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Jae,Crowder
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,John,Holland
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,R.J.,Hunter
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,Jonas,Jerebko
...,...,...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0,Trey,Lyles
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0,Shelvin,Mack
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0,Raul,Neto
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0,Tibor,Pleiss


In [181]:
n

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...
Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [194]:
nbc['Name']

0      Avery Bradley
1        Jae Crowder
2       John Holland
3        R.J. Hunter
4      Jonas Jerebko
           ...      
452       Trey Lyles
453     Shelvin Mack
454        Raul Neto
455     Tibor Pleiss
456      Jeff Withey
Name: Name, Length: 457, dtype: object

In [196]:
n['First Name'] = nbc['Position']
n

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary,First Name
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,
R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,
Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,
...,...,...,...,...,...,...,...,...,...
Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0,
Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0,
Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0,
Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0,


In [198]:
nb_num['First Name'] = new[0]
nb_num
#NOTE THAT HERE IN SECOND ROW JAE CROWDER GETS FIRST NAME ASSIGNED AS LUC 
#AND NOT JAE. Similarly for all others except the first one avery with index 
#number and number both as 0.
#WHY AN WORKING IS EXPLAINED BELOW

Unnamed: 0_level_0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,First Name
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Avery
99.0,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Luc
30.0,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Arron
28.0,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,Donald
8.0,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,Terry
...,...,...,...,...,...,...,...,...,...,...
41.0,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0,Kevin
8.0,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0,Terry
25.0,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0,Willie
21.0,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0,Sean


In [187]:
nb_num2['First Name'] = new[0]
nb_num2

Unnamed: 0_level_0,Name,Team,Position,Age,Height,Weight,College,Salary,First Name
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0,Avery Bradley,Boston Celtics,PG,25.0,6-2,180.0,Texas,7730337.0,Avery
99.0,Jae Crowder,Boston Celtics,SF,25.0,6-6,235.0,Marquette,6796117.0,Luc
30.0,John Holland,Boston Celtics,SG,27.0,6-5,205.0,Boston University,,Arron
28.0,R.J. Hunter,Boston Celtics,SG,22.0,6-5,185.0,Georgia State,1148640.0,Donald
8.0,Jonas Jerebko,Boston Celtics,PF,29.0,6-10,231.0,,5000000.0,Terry
...,...,...,...,...,...,...,...,...,...
41.0,Trey Lyles,Utah Jazz,PF,20.0,6-10,234.0,Kentucky,2239800.0,Kevin
8.0,Shelvin Mack,Utah Jazz,PG,26.0,6-3,203.0,Butler,2433333.0,Terry
25.0,Raul Neto,Utah Jazz,PG,24.0,6-1,179.0,,900000.0,Willie
21.0,Tibor Pleiss,Utah Jazz,C,26.0,7-3,256.0,,2900000.0,Sean


In [188]:
ncN=nc.set_index('Name', drop=False)
ncN

Unnamed: 0_level_0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Avery Bradley,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
John Holland,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
R.J. Hunter,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
Jonas Jerebko,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
Trey Lyles,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
Shelvin Mack,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
Raul Neto,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
Tibor Pleiss,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [189]:
ncN['First Name'] = new[0]
ncN

Unnamed: 0_level_0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,First Name
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Avery Bradley,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,
Jae Crowder,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,
John Holland,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,
R.J. Hunter,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,
Jonas Jerebko,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,
...,...,...,...,...,...,...,...,...,...,...
Trey Lyles,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0,
Shelvin Mack,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0,
Raul Neto,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0,
Tibor Pleiss,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0,


### AFTER A LOT OF GUESSING, 
THIS IS THE EXPLANATION.

HOW DOES THIS COLUMN ADDITION TO DATAFRAME WORK?
--> IT WORKS BASED ON MAPPING OF INDEX VALUES. THIS IS WHY ABOVE, THE MISTAKE WAS MADE FOR NUMBER INDEXED DATAFRAME. IT MAPPED THE FIRST NAMES(IE COLUMN 0) OF 'new' DATAFRAME ON THE BASIS OF NUMBER COLUMN VALUES WRT INDEX COLUMN OF NEW DATA FRAME. AND THIS IS ALSO WHY NO MATTER HOW THE NAME INDEXED DATAFRAME LOOKED WITH THE NAME SERIES DROPPED OR WITHOUT, THE ADDITIONAL FIRST NAME COLUMN WHEN ADDED ALWAYS SHOWED NaN. BECAUSE NAMES COULD NOT BE MAPPED TO THE INDEX NUMBERS OF new DATAFRAME. 
ANY NEW COLUMN WHEN ADDED FROM ONE DATAFRAME TO ANOTHER, IS MAPPED ON BASIS OF INDEX VALUES OF BOTH DATAFRAMES. IF INDEX COLUMNS DO NOT MATCH, MAPPING GOES WRONG AND THE PROCESS FAILS.

### THIS MAPPING COMES VERY HANDY WHEN MERGING DIFFERENT SETS OF DATAFRAMES WITH AT LEAST ONE COMMON COLUMN. WE CAN EASILY MAKE THAT INDEX FOR BOTH AND MERGE DIFFERENTLY ORGANIZED / ORDERED DATA FRAMES INTO ONE SINGLE DATAFRAME CONTAINING THE ENTIRE DATA.

In [199]:
#how it went wrong?
nb_num.loc[[30]]  
#all have first name as Arron. since index no. 30 in new dataset was Arron.

Unnamed: 0_level_0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,First Name
Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
30.0,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Arron
30.0,Thaddeus Young,Brooklyn Nets,30.0,PF,27.0,6-8,221.0,Georgia Tech,11235955.0,Arron
30.0,Stephen Curry,Golden State Warriors,30.0,PG,28.0,6-3,190.0,Davidson,11370786.0,Arron
30.0,C.J. Wilcox,Los Angeles Clippers,30.0,SG,25.0,6-5,195.0,Washington,1159680.0,Arron
30.0,Julius Randle,Los Angeles Lakers,30.0,PF,21.0,6-9,250.0,Kentucky,3132240.0,Arron
...,...,...,...,...,...,...,...,...,...,...
30.0,Seth Curry,Sacramento Kings,30.0,SG,25.0,6-2,185.0,Duke,947276.0,Arron
30.0,Dahntay Jones,Cleveland Cavaliers,30.0,SG,35.0,6-6,225.0,Duke,,Arron
30.0,Norris Cole,New Orleans Pelicans,30.0,PG,27.0,6-2,175.0,Cleveland State,3036927.0,Arron
30.0,David West,San Antonio Spurs,30.0,PF,35.0,6-9,250.0,Xavier,1499187.0,Arron


In [200]:
#by default, columns are added at the end/ appended.
#to interchange places:
nbc

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,First Name,Last Name
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Avery,Bradley
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Jae,Crowder
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,John,Holland
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,R.J.,Hunter
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,Jonas,Jerebko
...,...,...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0,Trey,Lyles
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0,Shelvin,Mack
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0,Raul,Neto
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0,Tibor,Pleiss


In [201]:
nbc.columns #copy list of columns then arrange them however you want it.

Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
       'College', 'Salary', 'First Name', 'Last Name'],
      dtype='object')

In [208]:
nbc=nbc[['Name', 'First Name', 'Last Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight','College', 'Salary']]
nbc

Unnamed: 0,Name,First Name,Last Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Avery,Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Jae,Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,John,Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,R.J.,Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Jonas,Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Trey,Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Shelvin,Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Raul,Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Tibor,Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [209]:
#another method to drop(remove) a column without setting it to index
nbc.drop(columns='First Name', inplace= True)
nbc

Unnamed: 0,Name,Last Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


#### another method to insert columns wherever you want to and not at the end

nbc.insert?
nbc.insert(
    loc: 'int',
    column: 'Hashable',
    value: 'Scalar | AnyArrayLike',
    allow_duplicates: 'bool | lib.NoDefault' = <no_default>,
) -> 'None'

if allow_duplicates is set to false it won't allow a column with same name to be added to the dataframe. if set to true, it alows same name column to be added.

In [211]:
nbc.insert(1, 'First Name', new[0])

In [212]:
nbc

Unnamed: 0,Name,First Name,Last Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Avery,Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Jae,Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,John,Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,R.J.,Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Jonas,Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Trey,Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Shelvin,Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Raul,Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Tibor,Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


nbc.drop?
nbc.drop(
    labels: 'IndexLabel' = None,
    *,
    axis: 'Axis' = 0,
    index: 'IndexLabel' = None,
    columns: 'IndexLabel' = None,
    level: 'Level' = None,
    inplace: 'bool' = False,
    errors: 'IgnoreRaise' = 'raise',
) -> 'DataFrame | None'

by default, axis =0, for axis =1, you can write index values as well that you want to drop

In [216]:
a= 1,2,3,4
#automatically creates a tuple.
a

(1, 2, 3, 4)

### Combining Data in Pandas

In [217]:
f= 1,2,5,6,3,7,11,0,4
s= 5,3,2,1,3,9,21,3,1
fs=pd.Series(f)
ss=pd.Series(s)

In [218]:
fs

0     1
1     2
2     5
3     6
4     3
5     7
6    11
7     0
8     4
dtype: int64

In [219]:
ss

0     5
1     3
2     2
3     1
4     3
5     9
6    21
7     3
8     1
dtype: int64

In [220]:
#to combine:
r= fs.combine(ss, lambda x1,x2: x1 if x1>x2 else x2)
r  
#index-wise comparison on the basis of lambda if condition to create a new series

0     5
1     3
2     5
3     6
4     3
5     9
6    21
7     3
8     4
dtype: int64

In [221]:
r1= fs.combine(ss, lambda x1,x2: x1+x2)
r1
#NOTE: SEQUENCE MATTERS HERE. x1 WILL BE FROM THE SERIES BEFORE .combine 
#AND x2 FROM THE SERIES INSIDE BRACKET.

0     6
1     5
2     7
3     7
4     6
5    16
6    32
7     3
8     5
dtype: int64

In [222]:
names=[]
maths=[]
phy=[]
chem=[]
bio=[]
eng=[]
for i in range(10):
    names.append(str(input('Enter name: ')))
    maths.append(float(input('Enter maths marks: ')))
    phy.append(float(input('Enter physics marks: ')))
    chem.append(float(input('Enter chemistry marks: ')))
    bio.append(float(input('Enter bio marks: ')))
    eng.append(float(input('Enter english marks: ')))
rec = {'Names': names, 'Maths': maths, 'Physics': phy, 'Chemistry': chem, 'Biology': bio, 'English': eng}
report= pd.DataFrame(rec)
report

#columns to be added after calculation: Total Marks, Total %, Best 4 %, Grade
#rows to be added after calculation: Class Average, Max score, Min score 

Enter name: Ishaan
Enter maths marks: 83
Enter physics marks: 90
Enter chemistry marks: 85
Enter bio marks: 91
Enter english marks: 93
Enter name: Mahi
Enter maths marks: 45
Enter physics marks: 52
Enter chemistry marks: 49
Enter bio marks: 60
Enter english marks: 58
Enter name: Piya
Enter maths marks: 43
Enter physics marks: 39
Enter chemistry marks: 50
Enter bio marks: 44
Enter english marks: 39
Enter name: Shanya
Enter maths marks: 95
Enter physics marks: 97
Enter chemistry marks: 92
Enter bio marks: 95
Enter english marks: 92
Enter name: Rohan
Enter maths marks: 44
Enter physics marks: 43
Enter chemistry marks: 53
Enter bio marks: 43
Enter english marks: 48
Enter name: Karan
Enter maths marks: 87
Enter physics marks: 86
Enter chemistry marks: 84
Enter bio marks: 80
Enter english marks: 81
Enter name: Kiara
Enter maths marks: 98
Enter physics marks: 97
Enter chemistry marks: 100
Enter bio marks: 95
Enter english marks: 98
Enter name: Mayank
Enter maths marks: 95
Enter physics marks:

Unnamed: 0,Names,Maths,Physics,Chemistry,Biology,English
0,Ishaan,83.0,90.0,85.0,91.0,93.0
1,Mahi,45.0,52.0,49.0,60.0,58.0
2,Piya,43.0,39.0,50.0,44.0,39.0
3,Shanya,95.0,97.0,92.0,95.0,92.0
4,Rohan,44.0,43.0,53.0,43.0,48.0
5,Karan,87.0,86.0,84.0,80.0,81.0
6,Kiara,98.0,97.0,100.0,95.0,98.0
7,Mayank,95.0,92.0,90.0,94.0,93.0
8,Anurag,75.0,82.0,84.0,0.0,85.0
9,Mehar,67.0,77.0,75.0,79.0,84.0


In [227]:
list(report['Maths']) #accessing data to use for operations

[83.0, 45.0, 43.0, 95.0, 44.0, 87.0, 98.0, 95.0, 75.0, 67.0]

In [None]:
#solution
total_marks=[]
for i in range(10):
    total_marks.append(

In [225]:
float(report.loc[1][['Maths']])

  float(report.loc[1][['Maths']])


45.0

### Concatenate Datasets

In [236]:
n1 = nba.loc[45:47].drop(columns = ['Number', 'Weight'])
n2 = nba.loc[132:135]
n3 = nba.tail(2)

In [237]:
n1

Unnamed: 0,Name,Team,Position,Age,Height,College,Salary
45,Tony Wroten,New York Knicks,SG,23.0,6-6,Washington,167406.0
46,Elton Brand,Philadelphia 76ers,PF,37.0,6-9,Duke,
47,Isaiah Canaan,Philadelphia 76ers,PG,25.0,6-0,Murray State,947276.0


In [238]:
n2

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
132,Mirza Teletovic,Phoenix Suns,35.0,PF,30.0,6-9,242.0,,5500000.0
133,P.J. Tucker,Phoenix Suns,17.0,SF,31.0,6-6,245.0,Texas,5500000.0
134,T.J. Warren,Phoenix Suns,12.0,SF,22.0,6-8,230.0,North Carolina State,2041080.0
135,Alan Williams,Phoenix Suns,15.0,C,23.0,6-8,260.0,UC Santa Barbara,83397.0


In [239]:
n3

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [240]:
frame = [n1,n3,n2]
n132 = pd.concat(frame)
n132
#OBSERVE how number and weight columns are at the end because n1 was the 
#first in order of concatenation which did not have these columns

Unnamed: 0,Name,Team,Position,Age,Height,College,Salary,Number,Weight
45,Tony Wroten,New York Knicks,SG,23.0,6-6,Washington,167406.0,,
46,Elton Brand,Philadelphia 76ers,PF,37.0,6-9,Duke,,,
47,Isaiah Canaan,Philadelphia 76ers,PG,25.0,6-0,Murray State,947276.0,,
455,Tibor Pleiss,Utah Jazz,C,26.0,7-3,,2900000.0,21.0,256.0
456,Jeff Withey,Utah Jazz,C,26.0,7-0,Kansas,947276.0,24.0,231.0
132,Mirza Teletovic,Phoenix Suns,PF,30.0,6-9,,5500000.0,35.0,242.0
133,P.J. Tucker,Phoenix Suns,SF,31.0,6-6,Texas,5500000.0,17.0,245.0
134,T.J. Warren,Phoenix Suns,SF,22.0,6-8,North Carolina State,2041080.0,12.0,230.0
135,Alan Williams,Phoenix Suns,C,23.0,6-8,UC Santa Barbara,83397.0,15.0,260.0


In [242]:
frame1= [n3,n2,n1]
n321 = pd.concat(frame1)
n321

#here columns are arragned according to n3 since it is first in order

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
132,Mirza Teletovic,Phoenix Suns,35.0,PF,30.0,6-9,242.0,,5500000.0
133,P.J. Tucker,Phoenix Suns,17.0,SF,31.0,6-6,245.0,Texas,5500000.0
134,T.J. Warren,Phoenix Suns,12.0,SF,22.0,6-8,230.0,North Carolina State,2041080.0
135,Alan Williams,Phoenix Suns,15.0,C,23.0,6-8,260.0,UC Santa Barbara,83397.0
45,Tony Wroten,New York Knicks,,SG,23.0,6-6,,Washington,167406.0
46,Elton Brand,Philadelphia 76ers,,PF,37.0,6-9,,Duke,
47,Isaiah Canaan,Philadelphia 76ers,,PG,25.0,6-0,,Murray State,947276.0


In [247]:
#another way is appending
n12 = n1.append(n2)  
#shows: 'DataFrame' object has no attribute 'append'
#but if it works, it can only concatenate 2 datasets.

AttributeError: 'DataFrame' object has no attribute 'append'

maths topics:
calculus: integration and differentiation
probability 
statistics
