#### Creating a Pandas Dataframe with Duplicate Elements
Create a sample Pandas dataframe with a dictionary of lists, say columns names are A, B, C, D, and E with duplicate elements.

In [2]:
import pandas as pd
 
# create a dictionary with five fields each
data = {
    'A': ['A1', 'A2', 'A3', 'A4', 'A5'],
    'B': ['B1', 'B2', 'B3', 'B4', 'B4'],
    'C': ['C1', 'C2', 'C3', 'C3', 'C3'],
    'D': ['D1', 'D2', 'D2', 'D2', 'D2'],
    'E': ['E1', 'E1', 'E1', 'E1', 'E1']}
 
# Convert the dictionary into DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C,D,E
0,A1,B1,C1,D1,E1
1,A2,B2,C2,D2,E1
2,A3,B3,C3,D2,E1
3,A4,B4,C3,D2,E1
4,A5,B4,C3,D2,E1


#### Get the Unique Values of ‘B’ Column

In [4]:
df['B'].value_counts()

B
B4    2
B1    1
B2    1
B3    1
Name: count, dtype: int64

In [11]:
li=[]
for i in range(df.shape[0]):
    if(df['B'][i] not in li):
        li.append(df['B'][i])
li

['B1', 'B2', 'B3', 'B4']

### pd.unique(values)
values-->1-D array like--->return numpy array of unique values from that provided 1-D array like 

In [15]:
pd.unique(df['B'])

array(['B1', 'B2', 'B3', 'B4'], dtype=object)

#### Get the Unique Values of Pandas in ‘E’ Column

In [16]:
pd.unique(df['E'])

array(['E1'], dtype=object)

#### Get Number of Unique Values in a Column

In [23]:
df

Unnamed: 0,A,B,C,D,E
0,A1,B1,C1,D1,E1
1,A2,B2,C2,D2,E1
2,A3,B3,C3,D2,E1
3,A4,B4,C3,D2,E1
4,A5,B4,C3,D2,E1


### df.nunique(axis=0,dropna=True)
gives counting of unique value along specified axis
if axis=0 column ke unique values ka count
if axis=1 row ke unique values ka count

In [24]:
df.nunique()

A    5
B    4
C    3
D    2
E    1
dtype: int64

In [25]:
df.nunique(axis=1)

0    5
1    5
2    5
3    5
4    5
dtype: int64

#### the number of unique values in the ‘C’ column, excluding NaN values

In [26]:
df['C'].nunique()

3

In [27]:
pd.unique(df['C'])

array(['C1', 'C2', 'C3'], dtype=object)

In [28]:
len(pd.unique(df['C']))

3

### Eliminate Duplicate Values from a Column using set()

In [29]:
set(df['C'])

{'C1', 'C2', 'C3'}

In [43]:
df

Unnamed: 0,A,B,C,D,E
0,A1,B1,C1,D1,E1
1,A2,B2,C2,D2,E1
2,A3,B3,C3,D2,E1
3,A4,B4,C3,D2,E1
4,A5,B4,C3,D2,E1


## Using Series.drop_duplicates()

In [44]:
df['B'].drop_duplicates()

0    B1
1    B2
2    B3
3    B4
Name: B, dtype: object

### Using pandas.concat() and Unique() Methods

In [30]:
df

Unnamed: 0,A,B,C,D,E
0,A1,B1,C1,D1,E1
1,A2,B2,C2,D2,E1
2,A3,B3,C3,D2,E1
3,A4,B4,C3,D2,E1
4,A5,B4,C3,D2,E1


In [42]:
pd.concat([df[col].unique() for col in df.columns])

TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid

In [4]:
import pandas as pd 
  
gapminder_csv_url ='http://bit.ly/2cLzoxH'
# load the data with pd.read_csv 
record = pd.read_csv('../DataSetGFG/gapminder_csv_url.csv') 
  
record.head()

Unnamed: 0,country,year,pop,continent,lifeExp,gdpPercap
0,Afghanistan,1952,8425333.0,Asia,28.801,779.445314
1,Afghanistan,1957,9240934.0,Asia,30.332,820.85303
2,Afghanistan,1962,10267083.0,Asia,31.997,853.10071
3,Afghanistan,1967,11537966.0,Asia,34.02,836.197138
4,Afghanistan,1972,13079460.0,Asia,36.088,739.981106


### Select the continent column from the record and apply the unique function to get the values as we want.

In [5]:
record['continent'].unique()

array(['Asia', 'Europe', 'Africa', 'Americas', 'Oceania'], dtype=object)

#### Select unique values from the countrycolumn.

In [9]:
record.country.unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
       'Australia', 'Austria', 'Bahrain', 'Bangladesh', 'Belgium',
       'Benin', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
       'Canada', 'Central African Republic', 'Chad', 'Chile', 'China',
       'Colombia', 'Comoros', 'Congo Dem. Rep.', 'Congo Rep.',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Czech Republic',
       'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Ethiopia',
       'Finland', 'France', 'Gabon', 'Gambia', 'Germany', 'Ghana',
       'Greece', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Haiti',
       'Honduras', 'Hong Kong China', 'Hungary', 'Iceland', 'India',
       'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy',
       'Jamaica', 'Japan', 'Jordan', 'Kenya', 'Korea Dem. Rep.',
       'Korea Rep.', 'Kuwait', 'Lebanon',

#### In this method you can see that we use the dataframe inside the unique function as parameter although we select the same column as above so we get the same output.

In [7]:
pd.unique(record['continent'])

array(['Asia', 'Europe', 'Africa', 'Americas', 'Oceania'], dtype=object)

In [8]:
pd.unique(record.continent)

array(['Asia', 'Europe', 'Africa', 'Americas', 'Oceania'], dtype=object)