In [2]:
import pandas as pd
import numpy as np
import sys

#### Checking Versions

In [3]:
print(f"Pandas Version: {pd.__version__}")
print(f"Numpy Version: {np.__version__}")
print(f"Sys Version: {sys.version}")

Pandas Version: 2.3.2
Numpy Version: 2.2.6
Sys Version: 3.13.5 (main, Jun 25 2025, 18:55:22) [GCC 14.2.0]


In [4]:
pd.describe_option()

compute.use_bottleneck : bool
    Use the bottleneck library to accelerate if it is installed,
    the default is True
    Valid values: False,True
    [default: True] [currently: True]
compute.use_numba : bool
    Use the numba engine option for select operations if it is installed,
    the default is False
    Valid values: False,True
    [default: False] [currently: False]
compute.use_numexpr : bool
    Use the numexpr library to accelerate computation if it is installed,
    the default is True
    Valid values: False,True
    [default: True] [currently: True]
display.chop_threshold : float or None
    if set to a float value, all float values smaller than the given threshold
    will be displayed as exactly 0 by repr and friends.
    [default: None] [currently: None]
display.colheader_justify : 'left'/'right'
    Controls the justification of column headers. used by DataFrameFormatter.
    [default: right] [currently: right]
display.date_dayfirst : boolean
    When True, prints an

#### Set Display Options for Better UX

In [5]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)
pd.set_option('display.precision', 3)

In [5]:
print(pd.show_versions(as_json=False))


INSTALLED VERSIONS
------------------
commit                : 4665c10899bc413b639194f6fb8665a5c70f7db5
python                : 3.13.5
python-bits           : 64
OS                    : Linux
OS-release            : 6.12.57+deb13-amd64
Version               : #1 SMP PREEMPT_DYNAMIC Debian 6.12.57-1 (2025-11-05)
machine               : x86_64
processor             : 
byteorder             : little
LC_ALL                : None
LANG                  : en_US.UTF-8
LOCALE                : en_US.UTF-8

pandas                : 2.3.2
numpy                 : 2.2.6
pytz                  : 2025.2
dateutil              : 2.9.0.post0
pip                   : 25.1.1
Cython                : None
sphinx                : None
IPython               : 9.5.0
adbc-driver-postgresql: None
adbc-driver-sqlite    : None
bs4                   : 4.13.5
blosc                 : None
bottleneck            : None
dataframe-api-compat  : None
fastparquet           : None
fsspec                : 2025.9.0
html5lib      

In [6]:
def optmzeConfig():
    confg_chnges = {
        'display.max_rows' : 50,
        'display.max_columns' : 20,
        'display.float_format' : '${:,.2f}'.format,
        'mode.chained_assignment' : 'warn',
        'compute.use_bottleneck' : True,
        'compute.use_numexpr' : True,
        'display.precision' : 6,
        'io.hdf.default_format' : 'table',
    }
    print("Applying Optimization Settings....")
    for opt, val in confg_chnges.items():
        try:
            old_val = pd.get_option(opt)
        except Exception:
            old_val = 'N/A'
        pd.set_option(opt, val)
        print(f" {opt:35} {str(old_val):20} -> {str(val):20}")
    
    # Backup the options we changed (safe iteration over our dict keys)
    with open('pandas_config.txt', 'w') as f:
        for opt in confg_chnges.keys():
            try:
                f.write(f"{opt}: {pd.get_option(opt)}\n")
            except Exception:
                pass
    print("\n Configuration Optimized and Backed UP")

optmzeConfig()

Applying Optimization Settings....
 display.max_rows                    60                   -> 50                  
 display.max_columns                 20                   -> 20                  
 display.float_format                None                 -> <built-in method format of str object at 0x7ff1a5a84770>
 mode.chained_assignment             warn                 -> warn                
 compute.use_bottleneck              True                 -> True                
 compute.use_numexpr                 True                 -> True                
 display.precision                   6                    -> 6                   
 io.hdf.default_format               None                 -> table               

 Configuration Optimized and Backed UP


In [7]:
def crtrndmSt():
    np.random.seed(98)
    depts = [
        "Engineering",
        'Sales',
        "Marketing",
        "HR",
        "Finance",
        "Product"
    ]

    dta = {
        'employee_id' : range(1000, 1100),
        'name' : [f"Employee{i}" for i in range(100)],
        'department' : np.random.choice(depts, 100),
        'salary' : np.random.normal(75000, 20000, 100).astype(int),
        'years_experince' : np.random.randint(0,30,100),
        'performance_rating' : np.random.uniform(1,5,100).round(1)
    }
    df = pd.DataFrame(dta)
    df.loc[np.random.choice(100,5), 'salary'] = np.nan
    df.loc[np.random.choice(100,3), 'department'] = None
    return df

compDf = crtrndmSt()
print(f"Shape of the Dataset : {compDf.shape}")
print(f"Columns in the datset is : {compDf.columns}")
print(f"Types in the datset is : {compDf.dtypes}")
print(f"Memory Usage of the Dataset is : {compDf.memory_usage(deep=True).sum() / 1024:.1f} KB")
print(f"Quick Statistics : {compDf.describe()}")
print(f"First Rows / Head : {compDf.head()}")
print(compDf.isnull().sum())

Shape of the Dataset : (100, 6)
Columns in the datset is : Index(['employee_id', 'name', 'department', 'salary', 'years_experince',
       'performance_rating'],
      dtype='object')
Types in the datset is : employee_id             int64
name                   object
department             object
salary                float64
years_experince         int64
performance_rating    float64
dtype: object
Memory Usage of the Dataset is : 14.3 KB
Quick Statistics :        employee_id      salary  years_experince  performance_rating
count      $100.00      $95.00          $100.00             $100.00
mean     $1,049.50  $73,166.71           $13.58               $3.15
std         $29.01  $17,619.97            $9.28               $1.16
min      $1,000.00  $26,549.00            $0.00               $1.10
25%      $1,024.75  $61,706.50            $4.00               $2.20
50%      $1,049.50  $71,661.00           $14.00               $3.10
75%      $1,074.25  $87,414.00           $21.50              