## Writing to CSV

In [2]:
import pandas as pd
df = pd.read_csv('dataframe.csv')

### Wrong

In [None]:
df.to_csv('output.csv') # creates an unnamed index column

### Right

In [None]:
df.to_csv('output.csv', index=False)

#### or

In [None]:
df = pd.read_csv('output.csv', index_col=[0])

## Query Method

In [None]:
df = pd.DataFrame({'year': [1970, 2000, 2010, 2020, 2023], 'name': ['a', 'b', 'c', 'd', 'e']})

### Wrong


In [None]:
df = df.loc[df['year'] <= 2020]

### Right

In [None]:
example_year = 2020
df = df.query('year <= @example_year')

## Vectorization

In [None]:
df = pd.DataFrame({'year': [1970, 2000, 2010, 2020, 2023], 'name': ['a', 'b', 'c', 'd', 'e']})

### Wrong

In [None]:
for i, row in df.iterrows():
    if row['year'] > 2000:
        df.loc[i, 'is_recent'] = True
    else:
        df.loc[i, 'is_recent'] = False

#### and

In [None]:
df['year_square'] = df.apply(lambda row: row['year'] ** 2, axis = 1)

### Right

In [None]:
df['is_recent'] = df['year'] > 2000

#### and

In [None]:
df['year_square'] = df['year'] ** 2

## Copy for working with a subset of DataFrame

In [3]:
df = pd.DataFrame({'year': [1970, 2000, 2010, 2020, 2023], 'name': ['a', 'b', 'c', 'd', 'e']})

### Wrong

In [6]:
df_old = df.query('year < 2000')
df_old['century'] = df_old['year'] // 100 # just an example, not perfect

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_old['century'] = df_old['year'] // 100 # just an example, not perfect


### Right

In [11]:
df_old = df.query('year < 2000').copy()
df_old['century'] = ((df_old['year'] - 1) // 100) + 1

## String methods

In [13]:
df = pd.DataFrame({'year': [1970, 2000, 2010, 2020, 2023], 'name': ['a', 'b', 'c', 'd', 'e']})

### Wrong

In [None]:
df['uppercase'] = df['name'].apply(lambda x: str(x).upper())

### Right

In [None]:
df['uppercase'] = df['name'].str.upper()