# Working With CSV Files

In [2]:
import pandas as pd
df = pd.read_csv('Students.csv')
df.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


# index.array:
The index.array is used to access the underlying array of the index of a pandas Series or DataFrame. It allows you to work with the index data as a NumPy array.

In [3]:
df.index.array

<PandasArray>
[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
 ...
 448, 449, 450, 451, 452, 453, 454, 455, 456, 457]
Length: 458, dtype: int64

# to_numpy():
The to_numpy() function is used to convert a pandas Series or DataFrame into a NumPy array.

In [4]:
df.to_numpy()

array([['Avery Bradley', 'Boston Celtics', 0.0, ..., 180.0, 'Texas',
        7730337.0],
       ['Jae Crowder', 'Boston Celtics', 99.0, ..., 235.0, 'Marquette',
        6796117.0],
       ['John Holland', 'Boston Celtics', 30.0, ..., 205.0,
        'Boston University', nan],
       ...,
       ['Tibor Pleiss', 'Utah Jazz', 21.0, ..., 256.0, nan, 2900000.0],
       ['Jeff Withey', 'Utah Jazz', 24.0, ..., 231.0, 'Kansas', 947276.0],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=object)

# np.array(dataframe):
The np.array() function from the NumPy library can be used to create a NumPy array from a pandas DataFrame.

In [5]:
import numpy as np
np.array(df)

array([['Avery Bradley', 'Boston Celtics', 0.0, ..., 180.0, 'Texas',
        7730337.0],
       ['Jae Crowder', 'Boston Celtics', 99.0, ..., 235.0, 'Marquette',
        6796117.0],
       ['John Holland', 'Boston Celtics', 30.0, ..., 205.0,
        'Boston University', nan],
       ...,
       ['Tibor Pleiss', 'Utah Jazz', 21.0, ..., 256.0, nan, 2900000.0],
       ['Jeff Withey', 'Utah Jazz', 24.0, ..., 231.0, 'Kansas', 947276.0],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=object)

# sort_index():
The sort_index() function is used to sort the elements of a pandas Series or DataFrame based on the index values.

In [8]:
df.sort_index(axis=0,ascending=False)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
457,,,,,,,,,
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
...,...,...,...,...,...,...,...,...,...
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0


# loc and iloc:
These are used for label-based and integer-based indexing, respectively, to select rows and columns from a DataFrame.

In [10]:
import pandas as pd

data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 22]}

df = pd.DataFrame(data, index=['row1', 'row2', 'row3'])

df

Unnamed: 0,Name,Age
row1,Alice,25
row2,Bob,30
row3,Charlie,22


In [11]:
# Using loc for label-based indexing
print(df.loc['row1'])  # Access a specific row by label

# Using iloc for integer-based indexing
print(df.iloc[1])  # Access a specific row by integer position

# Accessing specific cells
print(df.loc['row2', 'Name'])  # Access a cell using labels
print(df.iloc[0, 1])  # Access a cell using integer positions


Name    Alice
Age        25
Name: row1, dtype: object
Name    Bob
Age      30
Name: row2, dtype: object
Bob
25
