#In pandas, selecting data from a DataFrame involves specifying which columns and/or rows you want to work with. 
#You can use several methods for selection, including .loc, .iloc, and boolean indexing. 
#Below are detailed examples of how to select columns and rows in different ways

In [1]:
import pandas as pd
# Sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 2, 4, 5],
    'B': [5, 6, 6, 8, 9],
    'C': [10, 11, 11, 13, 14]
})
# Select a single column
col_A = df['A']
print(col_A)

0    1
1    2
2    2
3    4
4    5
Name: A, dtype: int64


In [2]:
# Select multiple columns
cols_AB = df[['A', 'B']]
print(cols_AB)

   A  B
0  1  5
1  2  6
2  2  6
3  4  8
4  5  9


In [3]:
# Select rows by index
rows_1_3 = df.iloc[1:3]
print(rows_1_3)

   A  B   C
1  2  6  11
2  2  6  11


In [4]:
# Set index to column 'A' for demonstration
df.set_index('A', inplace=True)
# Select rows by label
rows_2_4 = df.loc[2:4]
print(rows_2_4)

   B   C
A       
2  6  11
2  6  11
4  8  13


In [5]:
# Reset index to default for demonstration
df.reset_index(inplace=True)
# Select specific rows and columns by label
subset_loc = df.loc[1:3, ['A', 'C']]
print(subset_loc)                 

   A   C
1  2  11
2  2  11
3  4  13


In [6]:
# Select specific rows and columns by integer position
subset_iloc = df.iloc[1:3, [0, 2]]
print(subset_iloc)

   A   C
1  2  11
2  2  11


In [7]:
# Select rows where column 'A' is greater than 2
rows_condition = df[df['A'] > 2]
print(rows_condition)

   A  B   C
3  4  8  13
4  5  9  14


In [8]:
# Select rows where column 'A' is greater than 2 and column 'B' is less than 9
rows_multiple_conditions = df[(df['A'] > 2) & (df['B'] < 9)]
print(rows_multiple_conditions)

   A  B   C
3  4  8  13


In [9]:
import pandas as pd
# Sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 2, 4, 5],
    'B': [5, 6, 6, 8, 9],
    'C': [10, 11, 11, 13, 14]
})
# Select rows where column 'A' is greater than 2
df_A_gt_2 = df[df['A'] > 2]
print("Rows where 'A' > 2:")
print(df_A_gt_2)

Rows where 'A' > 2:
   A  B   C
3  4  8  13
4  5  9  14


In [10]:
import pandas as pd
# Sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 2, 4, 5],
    'B': [5, 6, 6, 8, 9]
})
# Add a new column 'C' with a constant value
df['C'] = 10
print("DataFrame with new column 'C':")
print(df)

DataFrame with new column 'C':
   A  B   C
0  1  5  10
1  2  6  10
2  2  6  10
3  4  8  10
4  5  9  10


In [11]:
# Add a new column 'D' which is the sum of columns 'A' and 'B'
df['D'] = df['A'] + df['B']
print("\nDataFrame with new column 'D' (A + B):")
print(df)


DataFrame with new column 'D' (A + B):
   A  B   C   D
0  1  5  10   6
1  2  6  10   8
2  2  6  10   8
3  4  8  10  12
4  5  9  10  14


In [14]:
# Drop column 'C'
df = df.drop(columns=['C'])
print("\nDataFrame after dropping column 'C':")
print(df)


DataFrame after dropping column 'C':
   A  B   D
0  1  5   6
1  2  6   8
2  2  6   8
3  4  8  12
4  5  9  14


In [16]:
# Update column 'A' with new values
df['A'] = [10, 20, 30, 40, 50]
print("\nDataFrame after updating column 'A':")
print(df)


DataFrame after updating column 'A':
    A  B   D
0  10  5   6
1  20  6   8
2  30  6   8
3  40  8  12
4  50  9  14


In [17]:
# Update column 'B' by doubling its values
df['B'] = df['B'].apply(lambda x: x * 2)
print("\nDataFrame after updating column 'B' with a function (doubling the values):")
print(df)


DataFrame after updating column 'B' with a function (doubling the values):
    A   B   D
0  10  10   6
1  20  12   8
2  30  12   8
3  40  16  12
4  50  18  14


In [18]:
# Update column 'A' to be 99 where column 'B' is greater than 10
df.loc[df['B'] > 10, 'A'] = 99
print("\nDataFrame after conditional update of column 'A':")
print(df)


DataFrame after conditional update of column 'A':
    A   B   D
0  10  10   6
1  99  12   8
2  99  12   8
3  99  16  12
4  99  18  14


In [19]:
import pandas as pd
# Sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6]
})
# Set column 'A' as index
df.set_index('A', inplace=True)
print("DataFrame with 'A' as index:")
print(df)

DataFrame with 'A' as index:
   B
A   
1  4
2  5
3  6


In [20]:
# Create a new index
df.index = ['X', 'Y', 'Z']
print("\nDataFrame with new index:")
print(df)


DataFrame with new index:
   B
X  4
Y  5
Z  6


In [21]:
# Reset index
df.reset_index(inplace=True)
print("\nDataFrame with reset index:")
print(df)


DataFrame with reset index:
  index  B
0     X  4
1     Y  5
2     Z  6


In [22]:
# Remove index
df.reset_index(drop=True, inplace=True)
print("\nDataFrame with index removed:")
print(df)


DataFrame with index removed:
  index  B
0     X  4
1     Y  5
2     Z  6


In [23]:
import pandas as pd
# Sample DataFrame
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6]
})
# Addition
df['C'] = df['A'] + df['B']
print("Addition:")
print(df)

Addition:
   A  B  C
0  1  4  5
1  2  5  7
2  3  6  9


In [25]:
# Subtraction
df['D'] = df['A'] - df['B']
print(df)

   A  B  C  D
0  1  4  5 -3
1  2  5  7 -3
2  3  6  9 -3


In [29]:
# Multiplication
df['E'] = df['A'] * df['B']
print(df)

   A  B  C  D   E     F
0  1  4  5 -3   4  0.25
1  2  5  7 -3  10  0.40
2  3  6  9 -3  18  0.50


In [28]:
# Division
df['F'] = df['A'] / df['B']
print(df)

   A  B  C  D   E     F
0  1  4  5 -3   4  0.25
1  2  5  7 -3  10  0.40
2  3  6  9 -3  18  0.50


In [2]:
import pan
df = pd.DataFrame({
    'A': [1, 2, 2, 4, 5],
    'B': [5, 6, 6, 8, 9],
    'C': [10, 11, 11, 13, 14]})
df.loc["A"]
df

NameError: name 'pd' is not defined