Skip to content

Commit

Permalink
Add helper method to read encoding from shapefile .cpg or LDID inform…
Browse files Browse the repository at this point in the history
…ation

Ported from the GDAL logic, which unfortunately is not available
for re-use directly from GDAL.
  • Loading branch information
nyalldawson committed Feb 12, 2020
1 parent 550c8bf commit bb71b71
Show file tree
Hide file tree
Showing 42 changed files with 210 additions and 0 deletions.
153 changes: 153 additions & 0 deletions src/core/qgsogrutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
#include <QUuid>
#include <cpl_error.h>
#include <QJsonDocument>
#include <QFileInfo>
#include <QDir>
#include <QTextStream>
#include <QDataStream>

#include "ogr_srs_api.h"

Expand Down Expand Up @@ -744,3 +748,152 @@ QgsCoordinateReferenceSystem QgsOgrUtils::OGRSpatialReferenceToCrs( OGRSpatialRe

return QgsCoordinateReferenceSystem::fromWkt( wkt );
}

QString QgsOgrUtils::readShapefileEncoding( const QString &path )
{
// unfortunately OGR's routines for calculating the shapefile encoding aren't exposed anywhere in the GDAL c api, so
// re-implement them here...
if ( !QFileInfo::exists( path ) )
return QString();

// first try to read cpg file, if present
const QFileInfo fi( path );
const QString baseName = fi.completeBaseName();
const QString cpgPath = fi.dir().filePath( QStringLiteral( "%1.%2" ).arg( baseName, fi.suffix() == QLatin1String( "SHP" ) ? QStringLiteral( "CPG" ) : QStringLiteral( "cpg" ) ) );
if ( QFile::exists( cpgPath ) )
{
QFile cpgFile( cpgPath );
if ( cpgFile.open( QIODevice::ReadOnly ) )
{
QTextStream cpgStream( &cpgFile );
const QString cpgString = cpgStream.readLine();
cpgFile.close();

if ( !cpgString.isEmpty() )
{
// from OGRShapeLayer::ConvertCodePage
// https://github.com/OSGeo/gdal/blob/master/gdal/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp#L342
bool ok = false;
int cpgCodePage = cpgString.toInt( &ok );
if ( ok && ( ( cpgCodePage >= 437 && cpgCodePage <= 950 )
|| ( cpgCodePage >= 1250 && cpgCodePage <= 1258 ) ) )
{
return QStringLiteral( "CP%1" ).arg( cpgCodePage );
}
else if ( cpgString.startsWith( QLatin1String( "8859" ) ) )
{
if ( cpgString.length() > 4 && cpgString.at( 4 ) == '-' )
return QStringLiteral( "ISO-8859-%1" ).arg( cpgString.mid( 5 ) );
else
return QStringLiteral( "ISO-8859-%1" ).arg( cpgString.mid( 4 ) );
}
else if ( cpgString.startsWith( QLatin1String( "UTF-8" ), Qt::CaseInsensitive ) ||
cpgString.startsWith( QLatin1String( "UTF8" ), Qt::CaseInsensitive ) )
return QStringLiteral( "UTF-8" );
else if ( cpgString.startsWith( QLatin1String( "ANSI 1251" ), Qt::CaseInsensitive ) )
return QStringLiteral( "CP1251" );

return cpgString;
}
}
}

// fallback to LDID value, read from DBF file
const QString dbfPath = fi.dir().filePath( QStringLiteral( "%1.%2" ).arg( baseName, fi.suffix() == QLatin1String( "SHP" ) ? QStringLiteral( "DBF" ) : QStringLiteral( "dbf" ) ) );
if ( QFile::exists( dbfPath ) )
{
QFile dbfFile( dbfPath );
if ( dbfFile.open( QIODevice::ReadOnly ) )
{
if ( dbfFile.skip( 29 ) == 29 )
{
QDataStream dbfIn( &dbfFile );
dbfIn.setByteOrder( QDataStream::LittleEndian );
quint8 ldid;
dbfIn >> ldid;
dbfFile.close();

int nCP = -1; // Windows code page.

// http://www.autopark.ru/ASBProgrammerGuide/DBFSTRUC.HTM
switch ( ldid )
{
case 1: nCP = 437; break;
case 2: nCP = 850; break;
case 3: nCP = 1252; break;
case 4: nCP = 10000; break;
case 8: nCP = 865; break;
case 10: nCP = 850; break;
case 11: nCP = 437; break;
case 13: nCP = 437; break;
case 14: nCP = 850; break;
case 15: nCP = 437; break;
case 16: nCP = 850; break;
case 17: nCP = 437; break;
case 18: nCP = 850; break;
case 19: nCP = 932; break;
case 20: nCP = 850; break;
case 21: nCP = 437; break;
case 22: nCP = 850; break;
case 23: nCP = 865; break;
case 24: nCP = 437; break;
case 25: nCP = 437; break;
case 26: nCP = 850; break;
case 27: nCP = 437; break;
case 28: nCP = 863; break;
case 29: nCP = 850; break;
case 31: nCP = 852; break;
case 34: nCP = 852; break;
case 35: nCP = 852; break;
case 36: nCP = 860; break;
case 37: nCP = 850; break;
case 38: nCP = 866; break;
case 55: nCP = 850; break;
case 64: nCP = 852; break;
case 77: nCP = 936; break;
case 78: nCP = 949; break;
case 79: nCP = 950; break;
case 80: nCP = 874; break;
case 87: return QStringLiteral( "ISO-8859-1" );
case 88: nCP = 1252; break;
case 89: nCP = 1252; break;
case 100: nCP = 852; break;
case 101: nCP = 866; break;
case 102: nCP = 865; break;
case 103: nCP = 861; break;
case 104: nCP = 895; break;
case 105: nCP = 620; break;
case 106: nCP = 737; break;
case 107: nCP = 857; break;
case 108: nCP = 863; break;
case 120: nCP = 950; break;
case 121: nCP = 949; break;
case 122: nCP = 936; break;
case 123: nCP = 932; break;
case 124: nCP = 874; break;
case 134: nCP = 737; break;
case 135: nCP = 852; break;
case 136: nCP = 857; break;
case 150: nCP = 10007; break;
case 151: nCP = 10029; break;
case 200: nCP = 1250; break;
case 201: nCP = 1251; break;
case 202: nCP = 1254; break;
case 203: nCP = 1253; break;
case 204: nCP = 1257; break;
default: break;
}

if ( nCP != -1 )
{
return QStringLiteral( "CP%1" ).arg( nCP );
}
}
else
{
dbfFile.close();
}
}
}
return QString();
}
8 changes: 8 additions & 0 deletions src/core/qgsogrutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,14 @@ class CORE_EXPORT QgsOgrUtils
* \since QGIS 3.10.1
*/
static QgsCoordinateReferenceSystem OGRSpatialReferenceToCrs( OGRSpatialReferenceH srs );

/**
* Reads the encoding of the shapefile at the specified \a path (where \a path is the
* location of the ".shp" file).
*
* \since QGIS 3.12
*/
static QString readShapefileEncoding( const QString &path );
};

#endif // QGSOGRUTILS_H
31 changes: 31 additions & 0 deletions tests/src/python/test_provider_shapefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,37 @@ def testOpenWithFilter(self):
# force close of data provider
vl.setDataSource('', 'test', 'ogr')

def testEncoding(self):
file_path = os.path.join(TEST_DATA_DIR, 'shapefile', 'iso-8859-1.shp')
vl = QgsVectorLayer(file_path)
self.assertTrue(vl.isValid())
self.assertEqual(vl.dataProvider().encoding(), 'ISO-8859-1')

file_path = os.path.join(TEST_DATA_DIR, 'shapefile', 'iso-8859-1_ldid.shp')
vl = QgsVectorLayer(file_path)
self.assertTrue(vl.isValid())
self.assertEqual(vl.dataProvider().encoding(), 'ISO-8859-1')

file_path = os.path.join(TEST_DATA_DIR, 'shapefile', 'latin1.shp')
vl = QgsVectorLayer(file_path)
self.assertTrue(vl.isValid())
self.assertEqual(vl.dataProvider().encoding(), 'ISO-8859-1')

file_path = os.path.join(TEST_DATA_DIR, 'shapefile', 'utf8.shp')
vl = QgsVectorLayer(file_path)
self.assertTrue(vl.isValid())
self.assertEqual(vl.dataProvider().encoding(), 'UTF-8')

file_path = os.path.join(TEST_DATA_DIR, 'shapefile', 'windows-1252.shp')
vl = QgsVectorLayer(file_path)
self.assertTrue(vl.isValid())
self.assertEqual(vl.dataProvider().encoding(), 'windows-1252')

file_path = os.path.join(TEST_DATA_DIR, 'shapefile', 'windows-1252_ldid.shp')
vl = QgsVectorLayer(file_path)
self.assertTrue(vl.isValid())
self.assertEqual(vl.dataProvider().encoding(), 'windows-1252')

def testCreateAttributeIndex(self):
tmpdir = tempfile.mkdtemp()
self.dirs_to_cleanup.append(tmpdir)
Expand Down
1 change: 1 addition & 0 deletions tests/testdata/shapefile/iso-8859-1.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
88591
Binary file added tests/testdata/shapefile/iso-8859-1.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/iso-8859-1.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
1 change: 1 addition & 0 deletions tests/testdata/shapefile/iso-8859-1.qpj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]
Binary file added tests/testdata/shapefile/iso-8859-1.shp
Binary file not shown.
Binary file added tests/testdata/shapefile/iso-8859-1.shx
Binary file not shown.
Binary file added tests/testdata/shapefile/iso-8859-1_ldid.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/iso-8859-1_ldid.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
1 change: 1 addition & 0 deletions tests/testdata/shapefile/iso-8859-1_ldid.qpj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]
Binary file added tests/testdata/shapefile/iso-8859-1_ldid.shp
Binary file not shown.
Binary file added tests/testdata/shapefile/iso-8859-1_ldid.shx
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/latin1.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
latin1
Binary file added tests/testdata/shapefile/latin1.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/latin1.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
1 change: 1 addition & 0 deletions tests/testdata/shapefile/latin1.qpj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]
Binary file added tests/testdata/shapefile/latin1.shp
Binary file not shown.
Binary file added tests/testdata/shapefile/latin1.shx
Binary file not shown.
Binary file added tests/testdata/shapefile/system_encoding.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/system_encoding.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
1 change: 1 addition & 0 deletions tests/testdata/shapefile/system_encoding.qpj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]
Binary file added tests/testdata/shapefile/system_encoding.shp
Binary file not shown.
Binary file added tests/testdata/shapefile/system_encoding.shx
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/utf8.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
UTF-8
Binary file added tests/testdata/shapefile/utf8.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/utf8.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
1 change: 1 addition & 0 deletions tests/testdata/shapefile/utf8.qpj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]
Binary file added tests/testdata/shapefile/utf8.shp
Binary file not shown.
Binary file added tests/testdata/shapefile/utf8.shx
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/windows-1252.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1252
Binary file added tests/testdata/shapefile/windows-1252.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/windows-1252.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
1 change: 1 addition & 0 deletions tests/testdata/shapefile/windows-1252.qpj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]
Binary file added tests/testdata/shapefile/windows-1252.shp
Binary file not shown.
Binary file added tests/testdata/shapefile/windows-1252.shx
Binary file not shown.
Binary file added tests/testdata/shapefile/windows-1252_ldid.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/testdata/shapefile/windows-1252_ldid.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
1 change: 1 addition & 0 deletions tests/testdata/shapefile/windows-1252_ldid.qpj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]
Binary file added tests/testdata/shapefile/windows-1252_ldid.shp
Binary file not shown.
Binary file added tests/testdata/shapefile/windows-1252_ldid.shx
Binary file not shown.

0 comments on commit bb71b71

Please sign in to comment.