In [None]:
# Passing by value

x = 2
y = x
y = 10
print(f"x = {x}")
print(f"y = {y}")

In [None]:
# Passing by reference

x = [2, 2]
y = x
y[0] = 10
print(f"x = {x}")
print(f"y = {y}")

In [None]:
# Make a copy to force passing by value

x = [2, 2]
y = x.copy()
y[0] = 10
print(f"x = {x}")
print(f"y = {y}")

In [None]:
# Same issue when altering within a function

def func(arg):
    arg[0] = 10
    return arg

x = [2, 2]
y = func(x)
print(f"x = {x}")
print(f"y = {y}")

In [None]:
# Again, make a copy

def func(arg):
    new_var = arg.copy()
    new_var[0] = 10
    return new_var

x = [2, 2]
y = func(x)
print(f"x = {x}")
print(f"y = {y}")

In [None]:
# Local variable

def func(arg):
    new_var = arg.copy()
    new_var[0] = 10
    return new_var

x = [2, 2]
y = func(x)
print(f"new_var = {new_var}")

In [None]:
# Local variable again

x = 2

def func(arg):
    x = 10
    return x*arg

y = func(3)

print(f"x = {x}")
print(f"y = {y}")

In [None]:
# Global variable

x = 2

def func(arg):
    return x*arg

y = func(3)

print(f"x = {x}")
print(f"y = {y}")

In [None]:
# Local variable one more time

x = [2, 2]

def func(arg):
    x = 2*x
    return x*arg

y = func(3)

print(f"x = {x}")
print(f"y = {y}")

### Summary

Be careful of two things:

1.  If you pass a list, dictionary, numpy array, pandas series, or pandas dataframe to a function and the function argument is modified within the function, then it will also be modified outside the function.  So, make a copy if you don't want the outside version to be modified - or just don't change the dictionary, array, ... inside the function in the first place.

2.  If you intend to use a local variable inside a function but forget to define it, and a variable with the same name exists outside the function, then the outside variable will be used in the function.

In [None]:
# example: aggregating within bins

import numpy as np
import pandas as pd
df = pd.DataFrame(
    {
        'a': np.random.randint(0, 100, 100),
        'b': np.random.randint(0, 100, 100)
    }
)
df['grp'] = pd.qcut(df.b, 3, labels={"Lo", " Med", "Hi"})
df.groupby('grp').a.mean()

In [None]:
# without adding a column to the dataframe

grps = pd.qcut(df.b, 3, labels={"Lo", " Med", "Hi"})
df.groupby(grps).a.mean()

In [None]:
# pandas cut and value_counts

import numpy as np 
import pandas as pd

np.random.seed(0)
x = np.random.randint(1, 11, size=15)
x

In [None]:
bins = pd.cut(x, bins=[0, 5, 11], labels=["Lo", "Hi"])
bins

In [None]:
bins.value_counts()