In [2]:
import pandas as pd
import numpy as np

# Sample Series with missing values
s = pd.Series([1, 2, np.nan, 4, 5, np.nan, 7])

s


0    1.0
1    2.0
2    NaN
3    4.0
4    5.0
5    NaN
6    7.0
dtype: float64

In [3]:
# Interpolate missing values
s_interpolated = s.interpolate()

print(s_interpolated)

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
6    7.0
dtype: float64


In [5]:
s

0    1.0
1    2.0
2    NaN
3    4.0
4    5.0
5    NaN
6    7.0
dtype: float64

In [4]:
s_poly = s.interpolate(method='polynomial', order=2)
print(s_poly)


0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
6    7.0
dtype: float64


In [11]:
s_poly = s.interpolate(method='polynomial', order=2)
print(s_poly)

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
6    7.0
dtype: float64


In [12]:
# Sample Series with missing values
s = pd.Series([1, 4, np.nan, 16])

s

0     1.0
1     4.0
2     NaN
3    16.0
dtype: float64

In [13]:
s_poly = s.interpolate(method='polynomial', order=2)
print(s_poly)

0     1.0
1     4.0
2     9.0
3    16.0
dtype: float64


In [14]:
s_poly = s.interpolate(method='polynomial', order=1)
print(s_poly)

0     1.0
1     4.0
2    10.0
3    16.0
dtype: float64


For a polynomial of degree n, you need at least n+1 non-missing data points to determine the coefficients of the polynomial.

In [16]:
# s_poly = s.interpolate(method='polynomial', order=3)
# print(s_poly

# For a polynomial of degree n, you need 
# at least n+1 non-missing data points to determine 
# the coefficients of the polynomial.

#throws error

In [17]:
s_spline = s.interpolate(method='spline', order=2)
print(s_spline)


0     1.0
1     4.0
2     9.0
3    16.0
dtype: float64


In [19]:
s_spline = s.interpolate(method='spline', order=1)
print(s_spline)

0     1.000000
1     4.000000
2    10.428571
3    16.000000
dtype: float64


In [20]:
s_spline = s.interpolate(method='polynomial', order=1)
print(s_spline)

0     1.0
1     4.0
2    10.0
3    16.0
dtype: float64


In [21]:
df = pd.DataFrame({
    'A': [1, 2.5, np.nan, 4.5, 5.5, np.nan, 7.5],
    'B': [0.5, np.nan, 2.5, 3.5, np.nan, 6.5, 7.5]
})
df



Unnamed: 0,A,B
0,1.0,0.5
1,2.5,
2,,2.5
3,4.5,3.5
4,5.5,
5,,6.5
6,7.5,7.5


In [22]:
df_interpolated = df.interpolate()
print(df_interpolated)

     A    B
0  1.0  0.5
1  2.5  1.5
2  3.5  2.5
3  4.5  3.5
4  5.5  5.0
5  6.5  6.5
6  7.5  7.5


In [24]:
df_interpolated = df.interpolate(method='polynomial', order=2)
print(df_interpolated)

          A         B
0  1.000000  0.500000
1  2.500000  1.525641
2  3.581197  2.500000
3  4.500000  3.500000
4  5.500000  4.987179
5  6.504274  6.500000
6  7.500000  7.500000


In [25]:
date_index = pd.date_range(start='2021-01-01', periods=7)
s_date = pd.Series([1, 2, np.nan, 4, 5, np.nan, 7], index=date_index)


s_date


2021-01-01    1.0
2021-01-02    2.0
2021-01-03    NaN
2021-01-04    4.0
2021-01-05    5.0
2021-01-06    NaN
2021-01-07    7.0
Freq: D, dtype: float64

In [26]:
# Interpolate using the date index
s_date_interpolated = s_date.interpolate(method='time')
print(s_date_interpolated)


2021-01-01    1.0
2021-01-02    2.0
2021-01-03    3.0
2021-01-04    4.0
2021-01-05    5.0
2021-01-06    6.0
2021-01-07    7.0
Freq: D, dtype: float64


In [31]:
date_index = pd.date_range(start='2021-01-01', periods=7)
s_date = pd.Series([1, 2, np.nan, 4, 5, np.nan, 7], index=date_index)
s_date

2021-01-01    1.0
2021-01-02    2.0
2021-01-03    NaN
2021-01-04    4.0
2021-01-05    5.0
2021-01-06    NaN
2021-01-07    7.0
Freq: D, dtype: float64

In [32]:
s_date_interpolated = s_date.interpolate(method='time')
print(s_date_interpolated)

2021-01-01    1.0
2021-01-02    2.0
2021-01-03    3.0
2021-01-04    4.0
2021-01-05    5.0
2021-01-06    6.0
2021-01-07    7.0
Freq: D, dtype: float64


In [38]:
date_index_missed = pd.date_range(start='2021-01-01', end='2021-01-07').drop(pd.Timestamp('2021-01-04'))
date_index_missed

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-05',
               '2021-01-06', '2021-01-07'],
              dtype='datetime64[ns]', freq=None)

In [39]:
s_date_missed = pd.Series([1, 2, np.nan, 5, np.nan, 7], index=date_index_missed)

s_date_missed

2021-01-01    1.0
2021-01-02    2.0
2021-01-03    NaN
2021-01-05    5.0
2021-01-06    NaN
2021-01-07    7.0
dtype: float64

In [34]:
s_date_missed_interpolated = s_date_missed.interpolate(method='time')
print(s_date_missed_interpolated)

2021-01-01    1.0
2021-01-02    2.0
2021-01-03    3.0
2021-01-05    5.0
2021-01-06    6.0
2021-01-07    7.0
dtype: float64


In [27]:
s_limit = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7])
s_limit


0    1.0
1    NaN
2    NaN
3    4.0
4    5.0
5    NaN
6    7.0
dtype: float64

In [29]:
s_limit_interpolated = s_limit.interpolate(limit=1)
print(s_limit_interpolated)

#limits consecutive interpolations

0    1.0
1    2.0
2    NaN
3    4.0
4    5.0
5    6.0
6    7.0
dtype: float64


In [40]:
import pandas as pd
import numpy as np

# Original series with a missing date '2021-01-04'
date_index_missed = pd.date_range(start='2021-01-01', end='2021-01-07').drop(pd.Timestamp('2021-01-04'))
s_date_missed = pd.Series([1, 2, np.nan, 5, np.nan, 7], index=date_index_missed)

s_date_missed



2021-01-01    1.0
2021-01-02    2.0
2021-01-03    NaN
2021-01-05    5.0
2021-01-06    NaN
2021-01-07    7.0
dtype: float64

In [43]:
# Create a full date range without any missing dates
full_date_index = pd.date_range(start='2021-01-01', end='2021-01-07')
full_date_index.to_frame()

Unnamed: 0,0
2021-01-01,2021-01-01
2021-01-02,2021-01-02
2021-01-03,2021-01-03
2021-01-04,2021-01-04
2021-01-05,2021-01-05
2021-01-06,2021-01-06
2021-01-07,2021-01-07


In [44]:
# Reindex the original series with the full date range
s_reindexed = s_date_missed.reindex(full_date_index)
s_reindexed

2021-01-01    1.0
2021-01-02    2.0
2021-01-03    NaN
2021-01-04    NaN
2021-01-05    5.0
2021-01-06    NaN
2021-01-07    7.0
Freq: D, dtype: float64

In [45]:
# Interpolate the values based on the new index
s_interpolated = s_reindexed.interpolate(method='time')

print(s_interpolated)

2021-01-01    1.0
2021-01-02    2.0
2021-01-03    3.0
2021-01-04    4.0
2021-01-05    5.0
2021-01-06    6.0
2021-01-07    7.0
Freq: D, dtype: float64
